diff --git a/setup.cfg b/setup.cfg
index 88842cd9..52b58f75 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -40,7 +40,7 @@ install_requires =
     numpy
     numba
     pandas
-    pyarrow >= 13.0.0
+    pyarrow >= 14.0.0
     pyyaml >= 5.1
     quivr >= 0.7.1
     ray[default]
diff --git a/thor/__init__.py b/thor/__init__.py
index 3c1fef6d..11cc9a9b 100644
--- a/thor/__init__.py
+++ b/thor/__init__.py
@@ -1,14 +1,13 @@
 from .version import __version__
 from .config import *
-from .constants import *
 from .orbits import *
 from .utils import *
+from .range_and_transform import *
+from .clusters import *
 from .projections import *
 from .orbit import *
+from .orbit_determination import *
 from .orbit_selection import *
-from .filter_orbits import *
 from .main import *
-from .main_2 import *  # TODO: remove when main is replaced
-from .analysis import *
 
 logger = setupLogger(__name__)
diff --git a/thor/analysis.py b/thor/analysis.py
deleted file mode 100644
index 4e686bda..00000000
--- a/thor/analysis.py
+++ /dev/null
@@ -1,290 +0,0 @@
-import os
-
-import pandas as pd
-from difi import analyzeLinkages, analyzeObservations
-
-__all__ = ["readOrbitDir", "analyzeTHOROrbit", "analyzeTHOR"]
-
-
-def readOrbitDir(orbit_dir):
-
-    projected_observations = pd.read_csv(
-        os.path.join(orbit_dir, "projected_observations.csv"),
-        index_col=False,
-        dtype={"obs_id": str},
-    )
-
-    clusters = pd.read_csv(os.path.join(orbit_dir, "clusters.csv"), index_col=False)
-
-    cluster_members = pd.read_csv(
-        os.path.join(orbit_dir, "cluster_members.csv"),
-        index_col=False,
-        dtype={"obs_id": str},
-    )
-
-    iod_orbits = Orbits.from_csv(
-        os.path.join(orbit_dir, "iod_orbits.csv"),
-    ).to_df(include_units=False)
-
-    iod_orbit_members = pd.read_csv(
-        os.path.join(orbit_dir, "iod_orbit_members.csv"),
-        index_col=False,
-        dtype={"obs_id": str},
-    )
-
-    od_orbits = Orbits.from_csv(
-        os.path.join(orbit_dir, "od_orbits.csv"),
-    ).to_df(include_units=False)
-
-    od_orbit_members = pd.read_csv(
-        os.path.join(orbit_dir, "od_orbit_members.csv"),
-        index_col=False,
-        dtype={"obs_id": str},
-    )
-
-    recovered_orbits = Orbits.from_csv(
-        os.path.join(orbit_dir, "recovered_orbits.csv"),
-    ).to_df(include_units=False)
-
-    recovered_orbit_members = pd.read_csv(
-        os.path.join(orbit_dir, "recovered_orbit_members.csv"),
-        index_col=False,
-        dtype={"obs_id": str},
-    )
-
-    data_products = (
-        projected_observations,
-        clusters,
-        cluster_members,
-        iod_orbits,
-        iod_orbit_members,
-        od_orbits,
-        od_orbit_members,
-        recovered_orbits,
-        recovered_orbit_members,
-    )
-    return data_products
-
-
-def analyzeTHOROrbit(
-    preprocessed_associations,
-    orbit_dir,
-    classes=None,
-    min_obs=5,
-    contamination_percentage=20,
-    metric="min_obs",
-    metric_kwargs={
-        "min_obs": 5,
-    },
-):
-
-    data_products = readOrbitDir(orbit_dir)
-    (
-        projected_observations,
-        clusters,
-        cluster_members,
-        iod_orbits,
-        iod_orbit_members,
-        od_orbits,
-        od_orbit_members,
-        recovered_orbits,
-        recovered_orbit_members,
-    ) = data_products
-
-    analysis_observations = projected_observations.merge(
-        preprocessed_associations, on="obs_id", how="left"
-    )
-
-    column_mapping = {"obs_id": "obs_id", "linkage_id": "cluster_id", "truth": "obj_id"}
-
-    all_truths, findable_observations, summary = analyzeObservations(
-        analysis_observations,
-        classes=classes,
-        metric=metric,
-        **metric_kwargs,
-        column_mapping=column_mapping,
-    )
-
-    all_clusters, all_truths_clusters, summary_clusters = analyzeLinkages(
-        analysis_observations,
-        cluster_members,
-        all_truths=all_truths,
-        min_obs=min_obs,
-        contamination_percentage=contamination_percentage,
-        classes=classes,
-        column_mapping=column_mapping,
-    )
-    for df in [all_clusters, all_truths_clusters, summary_clusters]:
-        df.insert(0, "component", "clustering")
-
-    column_mapping["linkage_id"] = "orbit_id"
-
-    all_iod_orbits, all_truths_iod, summary_iod = analyzeLinkages(
-        analysis_observations,
-        iod_orbit_members,
-        all_truths=all_truths,
-        min_obs=min_obs,
-        contamination_percentage=contamination_percentage,
-        classes=classes,
-        column_mapping=column_mapping,
-    )
-    for df in [all_iod_orbits, all_truths_iod, summary_iod]:
-        df.insert(0, "component", "iod")
-
-    all_od_orbits, all_truths_od, summary_od = analyzeLinkages(
-        analysis_observations,
-        od_orbit_members,
-        all_truths=all_truths,
-        min_obs=min_obs,
-        contamination_percentage=0.0,
-        classes=classes,
-        column_mapping=column_mapping,
-    )
-    for df in [all_od_orbits, all_truths_od, summary_od]:
-        df.insert(0, "component", "od")
-
-    all_recovered_orbits, all_truths_recovered, summary_recovered = analyzeLinkages(
-        analysis_observations,
-        recovered_orbit_members,
-        all_truths=all_truths,
-        min_obs=min_obs,
-        contamination_percentage=0.0,
-        classes=classes,
-        column_mapping=column_mapping,
-    )
-    for df in [all_recovered_orbits, all_truths_recovered, summary_recovered]:
-        df.insert(0, "component", "od+a")
-
-    summary = pd.concat([summary_clusters, summary_iod, summary_od, summary_recovered])
-    summary.reset_index(inplace=True, drop=True)
-
-    all_truths = pd.concat(
-        [all_truths_clusters, all_truths_iod, all_truths_od, all_truths_recovered]
-    )
-    all_truths.reset_index(inplace=True, drop=True)
-
-    all_linkages = pd.concat(
-        [all_clusters, all_iod_orbits, all_od_orbits, all_recovered_orbits]
-    )
-    all_linkages.reset_index(inplace=True, drop=True)
-
-    all_linkages = all_linkages[
-        [
-            "component",
-            "cluster_id",
-            "orbit_id",
-            "num_obs",
-            "num_members",
-            "pure",
-            "pure_complete",
-            "partial",
-            "mixed",
-            "contamination_percentage",
-            "found_pure",
-            "found_partial",
-            "found",
-            "linked_truth",
-        ]
-    ]
-    all_linkages.loc[all_linkages["cluster_id"].isna(), "cluster_id"] = "None"
-    all_linkages.loc[all_linkages["orbit_id"].isna(), "orbit_id"] = "None"
-
-    summary["linkage_purity"] = 100 * (summary["pure_linkages"] / summary["linkages"])
-
-    return all_linkages, all_truths, summary
-
-
-def analyzeTHOR(
-    preprocessed_associations,
-    out_dir,
-    min_obs=5,
-    contamination_percentage=20,
-    classes=None,
-    metric="min_obs",
-    metric_kwargs={"min_obs": 5},
-):
-
-    # Read preprocessed observations from out dir
-    preprocessed_observations = pd.read_csv(
-        os.path.join(out_dir, "preprocessed_observations.csv"),
-        index_col=False,
-        dtype={"obs_id": str},
-    )
-
-    # Merge with prepprocessed associations to create a set of 'analysis observations':
-    # observations that contain any previously known labels
-    analysis_observations = preprocessed_observations.merge(
-        preprocessed_associations, on="obs_id"
-    )
-
-    # Calculate which objects should be findable
-    column_mapping = {"obs_id": "obs_id", "truth": "obj_id", "linkage_id": "orbit_id"}
-    all_truths, findable_observations, summary = analyzeObservations(
-        analysis_observations,
-        classes=classes,
-        metric="min_obs",
-        column_mapping=column_mapping,
-        **metric_kwargs,
-    )
-
-    # Read the recovered orbits and orbit members
-    recovered_orbits = Orbits.from_csv(os.path.join(out_dir, "recovered_orbits.csv"))
-
-    recovered_orbit_members = pd.read_csv(
-        os.path.join(out_dir, "recovered_orbit_members.csv"),
-        index_col=False,
-        dtype={"obs_id": str},
-    )
-
-    # Calculate which objects were actually recovered
-    all_recovered_orbits, all_recovered_truths, recovered_summary = analyzeLinkages(
-        analysis_observations,
-        recovered_orbit_members,
-        all_truths=all_truths,
-        contamination_percentage=0.0,  # Recovered orbits should be contamination free (no partial orbits)
-        classes=classes,
-        column_mapping=column_mapping,
-    )
-
-    # Read test_orbits file from out_dir
-    test_orbits_file = os.path.join(out_dir, "test_orbits_out.csv")
-    if not os.path.exists(test_orbits_file):
-        raise ValueError("Cannot find test_orbits_out.csv.")
-    else:
-        test_orbits = Orbits.from_csv(test_orbits_file)
-
-    test_orbits_df = test_orbits.to_df(include_units=False)
-
-    all_linkages_dfs = []
-    all_truths_dfs = []
-    summary_dfs = []
-
-    # Go through each test orbit directory and analyze the outputs
-    for orbit_id in test_orbits_df["test_orbit_id"].unique():
-
-        orbit_dir = os.path.join(out_dir, "orbit_{}".format(orbit_id))
-
-        all_linkages, all_truths, summary = analyzeTHOROrbit(
-            preprocessed_associations,
-            orbit_dir,
-            classes=classes,
-            min_obs=min_obs,
-            contamination_percentage=contamination_percentage,
-            metric="min_obs",
-            metric_kwargs=metric_kwargs,
-        )
-
-        for df in [all_linkages, all_truths, summary]:
-            df.insert(0, "test_orbit_id", orbit_id)
-
-        all_linkages_dfs.append(all_linkages)
-        all_truths_dfs.append(all_truths)
-        summary_dfs.append(summary)
-
-    all_linkages = pd.concat(all_linkages_dfs, ignore_index=True)
-    all_truths = pd.concat(all_truths_dfs, ignore_index=True)
-    summary = pd.concat(summary_dfs, ignore_index=True)
-
-    test_orbit_analysis = (all_linkages, all_truths, summary)
-    run_analysis = (all_recovered_orbits, all_recovered_truths, recovered_summary)
-    return run_analysis, test_orbit_analysis
diff --git a/thor/clusters.py b/thor/clusters.py
index 546f6ea7..ef4c3d38 100644
--- a/thor/clusters.py
+++ b/thor/clusters.py
@@ -1,5 +1,18 @@
+import logging
+import time
+import uuid
+from typing import List, Literal, Optional, Tuple
+
 import numba
 import numpy as np
+import numpy.typing as npt
+import pyarrow as pa
+import pyarrow.compute as pc
+import quivr as qv
+import ray
+from adam_core.propagator import _iterate_chunks
+
+from .range_and_transform import TransformedDetections
 
 # Disable GPU until the GPU-accelerated clustering codes
 # are better tested and implemented
@@ -12,6 +25,74 @@
     from sklearn.cluster import DBSCAN
 
 
+__all__ = [
+    "cluster_and_link",
+    "Clusters",
+    "ClusterMembers",
+]
+
+logger = logging.getLogger("thor")
+
+
+class Clusters(qv.Table):
+    cluster_id = qv.StringColumn(default=lambda: uuid.uuid4().hex)
+    vtheta_x = qv.Float64Column()
+    vtheta_y = qv.Float64Column()
+    arc_length = qv.Float64Column()
+    num_obs = qv.Int64Column()
+
+    def drop_duplicates(
+        self, cluster_members: "ClusterMembers"
+    ) -> Tuple["Clusters", "ClusterMembers"]:
+        """
+        Drop clusters that have identical sets of observation IDs.
+
+        Parameters
+        ----------
+        cluster_members: `~thor.clusters.ClusterMembers`
+            A table of cluster members.
+
+        Returns
+        -------
+        `~thor.clusters.Clusters`
+            A table of clusters with duplicate clusters removed.
+        """
+
+        # Sort by cluster_id and obs_id
+        sorted = self.sort_by(["cluster_id"])
+        cluster_members = cluster_members.sort_by(["cluster_id", "obs_id"])
+
+        grouped_by_cluster_id = cluster_members.table.group_by(
+            ["cluster_id"]
+        ).aggregate([("obs_id", "distinct")])
+        grouped_by_cluster_id = grouped_by_cluster_id.append_column(
+            "index", pa.array(np.arange(0, len(sorted)))
+        )
+
+        # We revert to pandas here because grouping by a list of observation IDs with
+        # pyarrow functions fails at the table creation stage during aggregation.
+        # This is likely a missing feature in pyarrow. The following code doesn't work:
+        # grouped_by_obs_lists = grouped_by_cluster_id.group_by(
+        #   ["obs_id_distinct"],
+        #   use_threads=False
+        # ).aggregate([("index", "first")
+
+        df = grouped_by_cluster_id.to_pandas()
+        df["obs_id_distinct"] = df["obs_id_distinct"].apply(lambda x: x.tolist())
+        indices = df.drop_duplicates(subset=["obs_id_distinct"])["index"].values
+
+        filtered = sorted.take(indices)
+        filtered_cluster_members = cluster_members.apply_mask(
+            pc.is_in(cluster_members.cluster_id, filtered.cluster_id)
+        )
+        return filtered, filtered_cluster_members
+
+
+class ClusterMembers(qv.Table):
+    cluster_id = qv.StringColumn()
+    obs_id = qv.StringColumn()
+
+
 def find_clusters(points, eps, min_samples, alg="hotspot_2d"):
     """
     Find all clusters in a 2-dimensional array of datapoints.
@@ -85,6 +166,7 @@ def filter_clusters_by_length(clusters, dt, min_samples, min_arc_length):
         The original clusters list, filtered down.
     """
     filtered_clusters = []
+    arc_lengths = []
     for cluster in clusters:
         dt_in_cluster = dt[cluster]
         num_obs = len(dt_in_cluster)
@@ -95,7 +177,9 @@ def filter_clusters_by_length(clusters, dt, min_samples, min_arc_length):
             and (arc_length >= min_arc_length)
         ):
             filtered_clusters.append(cluster)
-    return filtered_clusters
+            arc_lengths.append(arc_length)
+
+    return filtered_clusters, arc_lengths
 
 
 def _find_clusters_hotspots_2d(points, eps, min_samples):
@@ -356,3 +440,335 @@ def _find_clusters_dbscan(points, eps, min_samples):
         clusters.append(cluster_indices)
     del db
     return clusters
+
+
+def cluster_velocity(
+    obs_ids: npt.ArrayLike,
+    x: npt.NDArray[np.float64],
+    y: npt.NDArray[np.float64],
+    dt: npt.NDArray[np.float64],
+    vx: float,
+    vy: float,
+    radius: float = 0.005,
+    min_obs: int = 5,
+    min_arc_length: float = 1.0,
+    alg: Literal["hotspot_2d", "dbscan"] = "dbscan",
+) -> Tuple[Clusters, ClusterMembers]:
+    """
+    Clusters THOR projection with different velocities
+    in the projection plane using `~scipy.cluster.DBSCAN`.
+    Parameters
+    ----------
+    obs_ids : `~numpy.ndarray' (N)
+        Observation IDs.
+    x : `~numpy.ndarray' (N)
+        Projection space x coordinate in degrees or radians.
+    y : `~numpy.ndarray' (N)
+        Projection space y coordinate in degrees or radians.
+    dt : `~numpy.ndarray' (N)
+        Change in time from 0th exposure in units of MJD.
+    vx : float
+        Projection space x velocity in units of degrees or radians per day in MJD.
+    vy : float
+        Projection space y velocity in units of degrees or radians per day in MJD.
+    radius : float, optional
+        The maximum distance between two samples for them to be considered
+        as in the same neighborhood.
+        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
+        [Default = 0.005]
+    min_obs : int, optional
+        The number of samples (or total weight) in a neighborhood for a
+        point to be considered as a core point. This includes the point itself.
+        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
+        [Default = 5]
+    min_arc_length : float, optional
+        Minimum arc length in units of days for a cluster to be accepted.
+
+    Returns
+    -------
+    list
+        If clusters are found, will return a list of numpy arrays containing the
+        observation IDs for each cluster. If no clusters are found, will return np.NaN.
+    """
+    logger.debug(f"cluster: vx={vx} vy={vy} n_obs={len(obs_ids)}")
+    xx = x - vx * dt
+    yy = y - vy * dt
+
+    X = np.stack((xx, yy), 1)
+
+    clusters = find_clusters(X, radius, min_obs, alg=alg)
+    clusters, arc_lengths = filter_clusters_by_length(
+        clusters,
+        dt,
+        min_obs,
+        min_arc_length,
+    )
+
+    if len(clusters) == 0:
+        return Clusters.empty(), ClusterMembers.empty()
+    else:
+
+        cluster_ids = []
+        cluster_num_obs = []
+        cluster_members_cluster_ids = []
+        cluster_members_obs_ids = []
+        for cluster in clusters:
+            id = uuid.uuid4().hex
+            obs_ids_i = obs_ids[cluster]
+            num_obs = len(obs_ids_i)
+
+            cluster_ids.append(id)
+            cluster_num_obs.append(num_obs)
+            cluster_members_cluster_ids.append(np.full(num_obs, id))
+            cluster_members_obs_ids.append(obs_ids_i)
+
+        clusters = Clusters.from_kwargs(
+            cluster_id=cluster_ids,
+            vtheta_x=np.full(len(cluster_ids), vx),
+            vtheta_y=np.full(len(cluster_ids), vy),
+            arc_length=arc_lengths,
+            num_obs=cluster_num_obs,
+        )
+
+        cluster_members = ClusterMembers.from_kwargs(
+            cluster_id=np.concatenate(cluster_members_cluster_ids),
+            obs_id=np.concatenate(cluster_members_obs_ids),
+        )
+
+    return clusters, cluster_members
+
+
+def cluster_velocity_worker(
+    vx: npt.NDArray[np.float64],
+    vy: npt.NDArray[np.float64],
+    obs_ids: npt.ArrayLike,
+    x: npt.NDArray[np.float64],
+    y: npt.NDArray[np.float64],
+    dt: npt.NDArray[np.float64],
+    radius: float = 0.005,
+    min_obs: int = 5,
+    min_arc_length: float = 1.0,
+    alg: Literal["hotspot_2d", "dbscan"] = "dbscan",
+) -> Tuple[Clusters, ClusterMembers]:
+    """
+    Helper function for parallelizing cluster_velocity. This function takes a
+    batch or chunk of velocities and returns the clusters and cluster members
+    for that batch.
+
+    """
+    clusters_list = []
+    cluster_members_list = []
+    for vx_i, vy_i in zip(vx, vy):
+        clusters_i, cluster_members_i = cluster_velocity(
+            obs_ids,
+            x,
+            y,
+            dt,
+            vx_i,
+            vy_i,
+            radius=radius,
+            min_obs=min_obs,
+            min_arc_length=min_arc_length,
+            alg=alg,
+        )
+        clusters_list.append(clusters_i)
+        cluster_members_list.append(cluster_members_i)
+
+    return qv.concatenate(clusters_list), qv.concatenate(cluster_members_list)
+
+
+cluster_velocity_remote = ray.remote(cluster_velocity_worker)
+cluster_velocity_remote.options(
+    num_returns=1,
+    num_cpus=1,
+)
+
+
+def cluster_and_link(
+    observations: TransformedDetections,
+    vx_range: List[float] = [-0.1, 0.1],
+    vy_range: List[float] = [-0.1, 0.1],
+    vx_bins: int = 100,
+    vy_bins: int = 100,
+    radius: float = 0.005,
+    min_obs: int = 5,
+    min_arc_length: float = 1.0,
+    alg: Literal["hotspot_2d", "dbscan"] = "dbscan",
+    chunk_size: int = 1000,
+    max_processes: Optional[int] = 1,
+) -> Tuple[Clusters, ClusterMembers]:
+    """
+    Cluster and link correctly projected (after ranging and shifting)
+    detections.
+
+    Parameters
+    ----------
+    observations : `~pandas.DataFrame`
+        DataFrame containing post-range and shift observations.
+    vx_range : {None, list or `~numpy.ndarray` (2)}
+        Maximum and minimum velocity range in x.
+        [Default = [-0.1, 0.1]]
+    vy_range : {None, list or `~numpy.ndarray` (2)}
+        Maximum and minimum velocity range in y.
+        [Default = [-0.1, 0.1]]
+    vx_bins : int, optional
+        Length of x-velocity grid between vx_range[0]
+        and vx_range[-1].
+        [Default = 100]
+    vy_bins: int, optional
+        Length of y-velocity grid between vy_range[0]
+        and vy_range[-1].
+        [Default = 100]
+    radius : float, optional
+        The maximum distance between two samples for them to be considered
+        as in the same neighborhood.
+        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
+        [Default = 0.005]
+    min_obs : int, optional
+        The number of samples (or total weight) in a neighborhood for a
+        point to be considered as a core point. This includes the point itself.
+        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
+        [Default = 5]
+    alg: str
+        Algorithm to use. Can be "dbscan" or "hotspot_2d".
+    num_jobs : int, optional
+        Number of jobs to launch.
+    parallel_backend : str, optional
+        Which parallelization backend to use {'ray', 'mp', 'cf'}.
+        Defaults to using Python's concurrent futures module ('cf').
+
+    Returns
+    -------
+    clusters : `~pandas.DataFrame`
+        DataFrame with the cluster ID, the number of observations, and the x and y velocity.
+    cluster_members : `~pandas.DataFrame`
+        DataFrame containing the cluster ID and the observation IDs of its members.
+
+    Notes
+    -----
+    The algorithm chosen can have a big impact on performance and accuracy.
+
+    alg="dbscan" uses the DBSCAN algorithm of Ester et. al. It's relatively slow
+    but works with high accuracy; it is certain to find all clusters with at
+    least min_obs points that are separated by at most radius.
+
+    alg="hotspot_2d" is much faster (perhaps 10-20x faster) than dbscan, but it
+    may miss some clusters, particularly when points are spaced a distance of 'radius'
+    apart.
+    """
+    time_start_cluster = time.time()
+    logger.info("Running velocity space clustering...")
+
+    vx = np.linspace(*vx_range, num=vx_bins)
+    vy = np.linspace(*vy_range, num=vy_bins)
+    vxx, vyy = np.meshgrid(vx, vy)
+    vxx = vxx.flatten()
+    vyy = vyy.flatten()
+
+    logger.debug("X velocity range: {}".format(vx_range))
+    logger.debug("X velocity bins: {}".format(vx_bins))
+    logger.debug("Y velocity range: {}".format(vy_range))
+    logger.debug("Y velocity bins: {}".format(vy_bins))
+    logger.debug("Velocity grid size: {}".format(vx_bins))
+    logger.info("Max sample distance: {}".format(radius))
+    logger.info("Minimum samples: {}".format(min_obs))
+
+    clusters_list = []
+    cluster_members_list = []
+    if len(observations) > 0:
+        # Extract useful quantities
+        obs_ids = observations.id.to_numpy(zero_copy_only=False)
+        theta_x = observations.coordinates.theta_x.to_numpy(zero_copy_only=False)
+        theta_y = observations.coordinates.theta_y.to_numpy(zero_copy_only=False)
+        mjd = observations.coordinates.time.mjd().to_numpy(zero_copy_only=False)
+
+        # Select detections in first exposure
+        first = np.where(mjd == mjd.min())[0]
+        mjd0 = mjd[first][0]
+        dt = mjd - mjd0
+
+        if max_processes is None or max_processes > 1:
+
+            if not ray.is_initialized():
+                ray.init(address="auto")
+
+            # Put all arrays (which can be large) in ray's
+            # local object store ahead of time
+            obs_ids_oid = ray.put(obs_ids)
+            theta_x_oid = ray.put(theta_x)
+            theta_y_oid = ray.put(theta_y)
+            dt_oid = ray.put(dt)
+
+            futures = []
+            for vxi_chunk, vyi_chunk in zip(
+                _iterate_chunks(vxx, chunk_size), _iterate_chunks(vyy, chunk_size)
+            ):
+                futures.append(
+                    cluster_velocity_remote.remote(
+                        vxi_chunk,
+                        vyi_chunk,
+                        obs_ids_oid,
+                        theta_x_oid,
+                        theta_y_oid,
+                        dt_oid,
+                        radius=radius,
+                        min_obs=min_obs,
+                        min_arc_length=min_arc_length,
+                        alg=alg,
+                    )
+                )
+
+            while futures:
+                finished, futures = ray.wait(futures, num_returns=1)
+                result = ray.get(finished[0])
+                clusters_list.append(result[0])
+                cluster_members_list.append(result[1])
+
+        else:
+
+            for vxi_chunk, vyi_chunk in zip(
+                _iterate_chunks(vxx, chunk_size), _iterate_chunks(vyy, chunk_size)
+            ):
+                clusters_i, cluster_members_i = cluster_velocity_worker(
+                    vxi_chunk,
+                    vyi_chunk,
+                    obs_ids,
+                    theta_x,
+                    theta_y,
+                    dt,
+                    radius=radius,
+                    min_obs=min_obs,
+                    min_arc_length=min_arc_length,
+                    alg=alg,
+                )
+                clusters_list.append(clusters_i)
+                cluster_members_list.append(cluster_members_i)
+
+        clusters = qv.concatenate(clusters_list)
+        cluster_members = qv.concatenate(cluster_members_list)
+
+        # Drop duplicate clusters
+        time_start_drop = time.time()
+        logger.info("Removing duplicate clusters...")
+        num_clusters = len(clusters)
+        clusters, cluster_members = clusters.drop_duplicates(cluster_members)
+        logger.info(f"Removed {num_clusters - len(clusters)} duplicate clusters.")
+        time_end_drop = time.time()
+        logger.info(
+            f"Cluster deduplication completed in {time_end_drop - time_start_drop:.3f} seconds."
+        )
+
+    else:
+
+        clusters = Clusters.empty()
+        cluster_members = ClusterMembers.empty()
+
+    time_end_cluster = time.time()
+    logger.info("Found {} clusters.".format(len(clusters)))
+    logger.info(
+        "Clustering completed in {:.3f} seconds.".format(
+            time_end_cluster - time_start_cluster
+        )
+    )
+
+    return clusters, cluster_members
diff --git a/thor/config.py b/thor/config.py
index 27b783fa..6524bc16 100644
--- a/thor/config.py
+++ b/thor/config.py
@@ -21,6 +21,7 @@ class Config:
     cluster_min_obs: int = 6
     cluster_min_arc_length: float = 1.0
     cluster_algorithm: Literal["hotspot_2d", "dbscan"] = "dbscan"
+    cluster_chunk_size: int = 1000
     iod_min_obs: int = 6
     iod_min_arc_length: float = 1.0
     iod_contamination_percentage: float = 20.0
diff --git a/thor/constants.py b/thor/constants.py
deleted file mode 100644
index ef6e107a..00000000
--- a/thor/constants.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import numpy as np
-
-__all__ = [
-    "KM_P_AU",
-    "S_P_DAY",
-    "Constants",
-    "DE43X",
-    "DE44X",
-]
-
-# km in an au
-KM_P_AU = 149597870.700
-# seconds in a day
-S_P_DAY = 86400.0
-
-
-class _Constants:
-    def __init__(self, C=None, MU=None, R_Earth=None, Obliquity=None):
-        self.C = C
-        self.MU = MU
-        self.R_EARTH = R_Earth
-        self.OBLIQUITY = Obliquity
-
-        # Transformation matrix from Equatorial J2000 to Ecliptic J2000
-        self.TRANSFORM_EQ2EC = np.array(
-            [
-                [1, 0, 0],
-                [0, np.cos(self.OBLIQUITY), np.sin(self.OBLIQUITY)],
-                [0, -np.sin(self.OBLIQUITY), np.cos(self.OBLIQUITY)],
-            ]
-        )
-
-        # Transformation matrix from Ecliptic J2000 to Equatorial J2000
-        self.TRANSFORM_EC2EQ = self.TRANSFORM_EQ2EC.T
-        return
-
-
-DE43X_CONSTANTS = {
-    # Speed of Light : au / d (299792.458 km / s -- DE430/DE431)
-    "C": 299792.458 / KM_P_AU * S_P_DAY,
-    # Standard Gravitational Parameter -- Sun :  au**3 / d**2 (0.295912208285591100E-3 -- DE431/DE430)
-    "MU": 0.295912208285591100e-3,
-    # Earth Equatorial Radius: au (6378.1363 km -- DE431/DE430)
-    "R_Earth": 6378.1363 / KM_P_AU,
-    # Mean Obliquity at J2000: radians (84381.448 arcseconds -- DE431/DE430)
-    "Obliquity": 84381.448 * np.pi / (180.0 * 3600.0),
-}
-Constants = DE43X = _Constants(**DE43X_CONSTANTS)
-
-DE44X_CONSTANTS = {
-    # Speed of Light : au / d (299792.458 km / s -- DE430/DE431)
-    "C": 299792.458 / KM_P_AU * S_P_DAY,
-    # Standard Gravitational Parameter -- Sun :  au**3 / d**2 (0.29591220828411956E-03 -- DE441/DE440)
-    "MU": 0.29591220828411956e-03,
-    # Earth Equatorial Radius: au (6378.1363 km -- DE431/DE430)
-    "R_Earth": 6378.1363 / KM_P_AU,
-    # Mean Obliquity at J2000: radians (84381.448 arcseconds -- DE431/DE430)
-    "Obliquity": 84381.448 * np.pi / (180.0 * 3600.0),
-}
-DE44X = _Constants(**DE44X_CONSTANTS)
diff --git a/thor/filter_orbits.py b/thor/filter_orbits.py
deleted file mode 100644
index 16f453db..00000000
--- a/thor/filter_orbits.py
+++ /dev/null
@@ -1,188 +0,0 @@
-from typing import Tuple
-
-import pandas as pd
-
-from .utils import calcDeltas
-
-__all__ = ["filterKnownOrbits", "filterOrbits"]
-
-
-def filterKnownOrbits(
-    orbits: pd.DataFrame,
-    orbit_observations: pd.DataFrame,
-    associations: pd.DataFrame,
-    min_obs: int = 5,
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
-    """
-    Remove all observations of unknown objects, keeping only observations of objects with
-    a known association. If any orbits have fewer than min_obs observations after removing
-    unknown observations then remove those orbits as well.
-
-    This function will also set the provisional and permanent designation columns as required
-    by the ADES file format.
-
-    Parameters
-    ----------
-    orbits : `~pandas.DataFrame`
-        DataFrame of orbits.
-    orbit_observations : `~pandas.DataFrame`
-        Dataframe of orbit observations with at least one column with the orbit ID ('orbit_id') and
-        one column with the 'obs_id'
-    associations : `~pandas.DataFrame`
-        DataFrame of known associations, with one column of containing the observation ID ('obs_id')
-        and another column containing the association ('obj_id'). Any unknown objects should have
-        been assigned an unknown ID. See preprocessObservations.
-    min_obs : int
-        The minimum number of observations for an object to be considered as recovered.
-
-    Returns
-    -------
-    known_orbits : `~pandas.DataFrame`
-        Orbits of previously known objects.
-    known_orbit_observations : `~pandas.DataFrame`
-        Observations of previously known objects, the constituent observations
-        to which the orbits were fit.
-    """
-    # Merge associations with orbit observations
-    labeled_observations = orbit_observations.merge(
-        associations[["obs_id", "obj_id"]], on="obs_id", how="left"
-    )
-
-    # Keep only observations of known objects
-    labeled_observations = labeled_observations[
-        ~labeled_observations["obj_id"].str.contains(UNKNOWN_ID_REGEX, regex=True)
-    ]
-
-    # Keep only known objects with at least min_obs observations
-    occurences = labeled_observations["orbit_id"].value_counts()
-    orbit_ids = occurences.index.values[occurences.values >= min_obs]
-
-    # Filter input orbits
-    orbits_mask = orbits["orbit_id"].isin(orbit_ids)
-    orbit_observations_mask = labeled_observations["orbit_id"].isin(orbit_ids)
-    known_orbits = orbits[orbits_mask].copy()
-    known_orbit_observations = labeled_observations[orbit_observations_mask].copy()
-
-    # Split into permanent and provisional designations
-    if len(known_orbit_observations) > 0:
-        known_orbit_observations.loc[:, "permID"] = ""
-        known_orbit_observations.loc[:, "provID"] = ""
-    else:
-        known_orbit_observations["permID"] = ""
-        known_orbit_observations["provID"] = ""
-
-    # Process permanent IDs first
-    # TODO : add an equivalent for Comets
-    perm_ids = known_orbit_observations["obj_id"].str.isnumeric()
-    known_orbit_observations.loc[perm_ids, "permID"] = known_orbit_observations[
-        perm_ids
-    ]["obj_id"].values
-
-    # Identify provisional IDs next
-    prov_ids = (~known_orbit_observations["obj_id"].str.isnumeric()) & (
-        ~known_orbit_observations["obj_id"].str.contains(UNKNOWN_ID_REGEX, regex=True)
-    )
-    known_orbit_observations.loc[prov_ids, "provID"] = known_orbit_observations[
-        prov_ids
-    ]["obj_id"].values
-
-    # Reorder the columns to put the labels toward the front
-    cols = known_orbit_observations.columns
-    first = ["orbit_id", "permID", "provID", "obj_id", "obs_id"]
-    cols_ordered = first + cols[~cols.isin(first)].tolist()
-    known_orbit_observations = known_orbit_observations[cols_ordered]
-
-    return known_orbits, known_orbit_observations
-
-
-def filterOrbits(
-    orbits: pd.DataFrame,
-    orbit_observations: pd.DataFrame,
-    associations: pd.DataFrame,
-    min_obs: int = 5,
-    min_time_separation: float = 30.0,
-    delta_cols: list = ["mjd_utc", "mag", "RA_deg", "Dec_deg"],
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
-    """
-    Filter orbits into orbits of previously known objects and potential discovery candidates.
-
-    Parameters
-    ----------
-    orbits : `~pandas.DataFrame`
-        DataFrame of orbits.
-    orbit_observations : `~pandas.DataFrame`
-        Dataframe of orbit observations with at least one column with the orbit ID ('orbit_id') and
-        one column with the 'obs_id'
-    associations : `~pandas.DataFrame`
-        DataFrame of known associations, with one column of containing the observation ID ('obs_id')
-        and another column containing the association ('obj_id'). Any unknown objects should have
-        been assigned an unknown ID. See preprocessObservations.
-    min_obs : int
-        The minimum number of observations for an object to be considered as recovered.
-    min_time_separation : int
-        The minimum time two observations should be separated in minutes. If any observations
-        for a single orbit are seperated by less than this amount then only the first observation is kept.
-        This is useful to prevent stationary sources from biasing orbit fits, although may decrease overall
-        completeness.
-    delta_cols : list[str]
-        Columns for which to calculate deltas (must include mjd_utc).
-
-    Returns
-    -------
-    discovery_candidates : (`~pandas.DataFrame`, `~pandas.DataFrame`)
-        DataFrame of dicovery candidate orbits and discovery candidate observations.
-    known_orbits : (`~pandas.DataFrame`, `~pandas.DataFrame`)
-        DataFrame of known orbits and known orbit observations.
-    """
-    # Calculate deltas of a variety of quantities (this returns the orbit_observations dataframe
-    # with the delta columns added)
-    deltas = calcDeltas(
-        orbit_observations, groupby_cols=["orbit_id", "night_id"], delta_cols=delta_cols
-    )
-
-    # Mark all observations within min_time of another as filtered
-    deltas.loc[:, "filtered"] = 1
-    deltas.loc[
-        (deltas["dmjd_utc"].isna())
-        | (deltas["dmjd_utc"] >= min_time_separation / 60 / 24),
-        "filtered",
-    ] = 0
-    orbits_ = orbits.copy()
-    orbit_observations_ = deltas.copy()
-
-    # Identify known orbits (also remove any observations of unknown objects from these orbits)
-    recovered_known_orbits, recovered_known_orbit_observations = filterKnownOrbits(
-        orbits_, orbit_observations_, associations, min_obs=min_obs
-    )
-
-    # Remove the known orbits from the pool of orbits
-    # The remaining orbits are potential candidates
-    known_orbit_ids = recovered_known_orbits["orbit_id"].values
-    candidate_orbits = orbits_[~orbits_["orbit_id"].isin(known_orbit_ids)]
-    candidate_orbit_observations = orbit_observations_[
-        ~orbit_observations_["orbit_id"].isin(known_orbit_ids)
-    ]
-
-    # Remove any observations of the candidate discoveries that are potentially
-    # too close in time to eachother (removes stationary source that could bias results)
-    # Any orbits that now have fewer than min_obs observations are also removed
-    candidate_orbit_observations = candidate_orbit_observations[
-        candidate_orbit_observations["filtered"] == 0
-    ]
-    occurences = candidate_orbit_observations["orbit_id"].value_counts()
-    orbit_ids = occurences.index.values[occurences.values >= min_obs]
-    candidate_orbits = orbits[orbits["orbit_id"].isin(orbit_ids)]
-    candidate_orbit_observations = candidate_orbit_observations[
-        candidate_orbit_observations["orbit_id"].isin(orbit_ids)
-    ]
-
-    # Add a trkSub column to the discovery candidates
-    trk_subs = [
-        f"t{i[0:4]}{i[-3:]}" for i in candidate_orbit_observations["orbit_id"].values
-    ]
-    candidate_orbit_observations.insert(1, "trkSub", trk_subs)
-
-    discovery_candidates = (candidate_orbits, candidate_orbit_observations)
-    known_orbits = (recovered_known_orbits, recovered_known_orbit_observations)
-
-    return discovery_candidates, known_orbits
diff --git a/thor/main.py b/thor/main.py
index 31e351b7..1083cac8 100644
--- a/thor/main.py
+++ b/thor/main.py
@@ -1,443 +1,193 @@
-import os
-
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["OPENBLAS_NUM_THREADS"] = "1"
-os.environ["MKL_NUM_THREADS"] = "1"
-os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
-os.environ["NUMEXPR_NUM_THREADS"] = "1"
-
-import concurrent.futures as cf
 import logging
-import multiprocessing as mp
 import time
-import uuid
-from functools import partial
-
-import numpy as np
-import pandas as pd
-
-from .clusters import filter_clusters_by_length, find_clusters
-from .utils import _checkParallel, _initWorker
+from typing import Any, Iterator, List, Optional
+
+import quivr as qv
+import ray
+from adam_core.propagator import PYOORB
+
+from .clusters import cluster_and_link
+from .config import Config
+from .observations.filters import ObservationFilter, TestOrbitRadiusObservationFilter
+from .observations.observations import Observations
+from .orbit import TestOrbit
+from .orbits import (
+    differential_correction,
+    initial_orbit_determination,
+    merge_and_extend_orbits,
+)
+from .range_and_transform import range_and_transform
 
 logger = logging.getLogger("thor")
 
-__all__ = [
-    "clusterVelocity",
-    "clusterVelocity_worker",
-    "clusterAndLink",
-]
-
-
-
-def clusterVelocity(
-    obs_ids,
-    x,
-    y,
-    dt,
-    vx,
-    vy,
-    eps=0.005,
-    min_obs=5,
-    min_arc_length=1.0,
-    alg="hotspot_2d",
-):
-    """
-    Clusters THOR projection with different velocities
-    in the projection plane using `~scipy.cluster.DBSCAN`.
-    Parameters
-    ----------
-    obs_ids : `~numpy.ndarray' (N)
-        Observation IDs.
-    x : `~numpy.ndarray' (N)
-        Projection space x coordinate in degrees or radians.
-    y : `~numpy.ndarray' (N)
-        Projection space y coordinate in degrees or radians.
-    dt : `~numpy.ndarray' (N)
-        Change in time from 0th exposure in units of MJD.
-    vx : `~numpy.ndarray' (N)
-        Projection space x velocity in units of degrees or radians per day in MJD.
-    vy : `~numpy.ndarray' (N)
-        Projection space y velocity in units of degrees or radians per day in MJD.
-    eps : float, optional
-        The maximum distance between two samples for them to be considered
-        as in the same neighborhood.
-        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
-        [Default = 0.005]
-    min_obs : int, optional
-        The number of samples (or total weight) in a neighborhood for a
-        point to be considered as a core point. This includes the point itself.
-        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
-        [Default = 5]
-    min_arc_length : float, optional
-        Minimum arc length in units of days for a cluster to be accepted.
-
-    Returns
-    -------
-    list
-        If clusters are found, will return a list of numpy arrays containing the
-        observation IDs for each cluster. If no clusters are found, will return np.NaN.
-    """
-    logger.debug(f"cluster: vx={vx} vy={vy} n_obs={len(obs_ids)}")
-    xx = x - vx * dt
-    yy = y - vy * dt
-
-    X = np.stack((xx, yy), 1)
-
-    clusters = find_clusters(X, eps, min_obs, alg=alg)
-    clusters = filter_clusters_by_length(
-        clusters,
-        dt,
-        min_obs,
-        min_arc_length,
-    )
-
-    cluster_ids = []
-    for cluster in clusters:
-        cluster_ids.append(obs_ids[cluster])
-
-    if len(cluster_ids) == 0:
-        cluster_ids = np.NaN
 
-    return cluster_ids
-
-
-def clusterVelocity_worker(
-    vx,
-    vy,
-    obs_ids=None,
-    x=None,
-    y=None,
-    dt=None,
-    eps=None,
-    min_obs=None,
-    min_arc_length=None,
-    alg=None,
-):
-    """
-    Helper function to multiprocess clustering.
-
-    """
-    cluster_ids = clusterVelocity(
-        obs_ids,
-        x,
-        y,
-        dt,
-        vx,
-        vy,
-        eps=eps,
-        min_obs=min_obs,
-        min_arc_length=min_arc_length,
-        alg=alg,
-    )
-    return cluster_ids
-
-
-
-def clusterAndLink(
-    observations,
-    vx_range=[-0.1, 0.1],
-    vy_range=[-0.1, 0.1],
-    vx_bins=100,
-    vy_bins=100,
-    eps=0.005,
-    min_obs=5,
-    min_arc_length=1.0,
-    alg="dbscan",
-    num_jobs=1,
-    parallel_backend="cf",
-):
+def link_test_orbit(
+    test_orbit: TestOrbit,
+    observations: Observations,
+    filters: Optional[List[ObservationFilter]] = None,
+    config: Optional[Config] = None,
+) -> Iterator[Any]:
     """
-    Cluster and link correctly projected (after ranging and shifting)
-    detections.
+    Run THOR for a single test orbit on the given observations. This function will yield
+    results at each stage of the pipeline.
+        1. transformed_detections
+        2. clusters, cluster_members
+        3. iod_orbits, iod_orbit_members
+        4. od_orbits, od_orbit_members
+        5. recovered_orbits, recovered_orbit_members
 
     Parameters
     ----------
-    observations : `~pandas.DataFrame`
-        DataFrame containing post-range and shift observations.
-    vx_range : {None, list or `~numpy.ndarray` (2)}
-        Maximum and minimum velocity range in x.
-        [Default = [-0.1, 0.1]]
-    vy_range : {None, list or `~numpy.ndarray` (2)}
-        Maximum and minimum velocity range in y.
-        [Default = [-0.1, 0.1]]
-    vx_bins : int, optional
-        Length of x-velocity grid between vx_range[0]
-        and vx_range[-1].
-        [Default = 100]
-    vy_bins: int, optional
-        Length of y-velocity grid between vy_range[0]
-        and vy_range[-1].
-        [Default = 100]
-    eps : float, optional
-        The maximum distance between two samples for them to be considered
-        as in the same neighborhood.
-        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
-        [Default = 0.005]
-    min_obs : int, optional
-        The number of samples (or total weight) in a neighborhood for a
-        point to be considered as a core point. This includes the point itself.
-        See: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.dbscan.html
-        [Default = 5]
-    alg: str
-        Algorithm to use. Can be "dbscan" or "hotspot_2d".
-    num_jobs : int, optional
-        Number of jobs to launch.
-    parallel_backend : str, optional
-        Which parallelization backend to use {'ray', 'mp', 'cf'}.
-        Defaults to using Python's concurrent futures module ('cf').
-
-    Returns
-    -------
-    clusters : `~pandas.DataFrame`
-        DataFrame with the cluster ID, the number of observations, and the x and y velocity.
-    cluster_members : `~pandas.DataFrame`
-        DataFrame containing the cluster ID and the observation IDs of its members.
-
-    Notes
-    -----
-    The algorithm chosen can have a big impact on performance and accuracy.
-
-    alg="dbscan" uses the DBSCAN algorithm of Ester et. al. It's relatively slow
-    but works with high accuracy; it is certain to find all clusters with at
-    least min_obs points that are separated by at most eps.
-
-    alg="hotspot_2d" is much faster (perhaps 10-20x faster) than dbscan, but it
-    may miss some clusters, particularly when points are spaced a distance of 'eps'
-    apart.
+    test_orbit : `~thor.orbit.TestOrbit`
+        Test orbit to use to gather and transform observations.
+    observations : `~thor.observations.observations.Observations`
+        Observations from which range and transform the detections.
+    filters : list of `~thor.observations.filters.ObservationFilter`, optional
+        List of filters to apply to the observations before running THOR.
+    propagator : `~adam_core.propagator.propagator.Propagator`
+        Propagator to use to propagate the test orbit and generate
+        ephemerides.
+    max_processes : int, optional
+        Maximum number of processes to use for parallelization.
     """
-    time_start_cluster = time.time()
-    logger.info("Running velocity space clustering...")
-
-    vx = np.linspace(*vx_range, num=vx_bins)
-    vy = np.linspace(*vy_range, num=vy_bins)
-    vxx, vyy = np.meshgrid(vx, vy)
-    vxx = vxx.flatten()
-    vyy = vyy.flatten()
-
-    logger.debug("X velocity range: {}".format(vx_range))
-    logger.debug("X velocity bins: {}".format(vx_bins))
-    logger.debug("Y velocity range: {}".format(vy_range))
-    logger.debug("Y velocity bins: {}".format(vy_bins))
-    logger.debug("Velocity grid size: {}".format(vx_bins))
-    logger.info("Max sample distance: {}".format(eps))
-    logger.info("Minimum samples: {}".format(min_obs))
-
-    possible_clusters = []
-    if len(observations) > 0:
-        # Extract useful quantities
-        obs_ids = observations["obs_id"].values
-        theta_x = observations["theta_x_deg"].values
-        theta_y = observations["theta_y_deg"].values
-        mjd = observations["mjd_utc"].values
-
-        # Select detections in first exposure
-        first = np.where(mjd == mjd.min())[0]
-        mjd0 = mjd[first][0]
-        dt = mjd - mjd0
+    time_start = time.perf_counter()
+    logger.info("Running test orbit...")
 
-        parallel, num_workers = _checkParallel(num_jobs, parallel_backend)
-        if parallel:
-            if parallel_backend == "ray":
-                import ray
+    if config is None:
+        config = Config()
 
-                if not ray.is_initialized():
-                    ray.init(address="auto")
-
-                clusterVelocity_worker_ray = ray.remote(clusterVelocity_worker)
-                clusterVelocity_worker_ray = clusterVelocity_worker_ray.options(
-                    num_returns=1, num_cpus=1
-                )
-
-                # Put all arrays (which can be large) in ray's
-                # local object store ahead of time
-                obs_ids_oid = ray.put(obs_ids)
-                theta_x_oid = ray.put(theta_x)
-                theta_y_oid = ray.put(theta_y)
-                dt_oid = ray.put(dt)
-
-                p = []
-                for vxi, vyi in zip(vxx, vyy):
-                    p.append(
-                        clusterVelocity_worker_ray.remote(
-                            vxi,
-                            vyi,
-                            obs_ids=obs_ids_oid,
-                            x=theta_x_oid,
-                            y=theta_y_oid,
-                            dt=dt_oid,
-                            eps=eps,
-                            min_obs=min_obs,
-                            min_arc_length=min_arc_length,
-                            alg=alg,
-                        )
-                    )
-                possible_clusters = ray.get(p)
-
-            elif parallel_backend == "mp":
-                p = mp.Pool(processes=num_workers, initializer=_initWorker)
-                possible_clusters = p.starmap(
-                    partial(
-                        clusterVelocity_worker,
-                        obs_ids=obs_ids,
-                        x=theta_x,
-                        y=theta_y,
-                        dt=dt,
-                        eps=eps,
-                        min_obs=min_obs,
-                        min_arc_length=min_arc_length,
-                        alg=alg,
-                    ),
-                    zip(vxx, vyy),
-                )
-                p.close()
-
-            elif parallel_backend == "cf":
-                with cf.ProcessPoolExecutor(
-                    max_workers=num_workers, initializer=_initWorker
-                ) as executor:
-                    futures = []
-                    for vxi, vyi in zip(vxx, vyy):
-                        f = executor.submit(
-                            clusterVelocity_worker,
-                            vxi,
-                            vyi,
-                            obs_ids=obs_ids,
-                            x=theta_x,
-                            y=theta_y,
-                            dt=dt,
-                            eps=eps,
-                            min_obs=min_obs,
-                            min_arc_length=min_arc_length,
-                            alg=alg,
-                        )
-                        futures.append(f)
-
-                    possible_clusters = []
-                    for f in cf.as_completed(futures):
-                        possible_clusters.append(f.result())
-
-            else:
-                raise ValueError(
-                    "Invalid parallel_backend: {}".format(parallel_backend)
-                )
-
-        else:
-            possible_clusters = []
-            for vxi, vyi in zip(vxx, vyy):
-                possible_clusters.append(
-                    clusterVelocity(
-                        obs_ids,
-                        theta_x,
-                        theta_y,
-                        dt,
-                        vxi,
-                        vyi,
-                        eps=eps,
-                        min_obs=min_obs,
-                        min_arc_length=min_arc_length,
-                        alg=alg,
-                    )
-                )
-
-    time_end_cluster = time.time()
-    logger.info(
-        "Clustering completed in {:.3f} seconds.".format(
-            time_end_cluster - time_start_cluster
-        )
-    )
-
-    logger.info("Restructuring clusters...")
-    time_start_restr = time.time()
-
-    possible_clusters = pd.DataFrame({"clusters": possible_clusters})
-
-    # Remove empty clusters
-    possible_clusters = possible_clusters[~possible_clusters["clusters"].isna()]
-
-    if len(possible_clusters) != 0:
-        ### The following code is a little messy, its a lot of pandas dataframe manipulation.
-        ### I have tried doing an overhaul wherein the clusters and cluster_members dataframe are created per
-        ### velocity combination in the clusterVelocity function. However, this adds an overhead in that function
-        ### of ~ 1ms. So clustering 90,000 velocities takes 90 seconds longer which on small datasets is problematic.
-        ### On large datasets, the effect is not as pronounced because the below code takes a while to run due to
-        ### in-memory pandas dataframe restructuring.
-
-        # Make DataFrame with cluster velocities so we can figure out which
-        # velocities yielded clusters, add names to index so we can enable the join
-        cluster_velocities = pd.DataFrame({"vtheta_x": vxx, "vtheta_y": vyy})
-        cluster_velocities.index.set_names("velocity_id", inplace=True)
-
-        # Split lists of cluster ids into one column per cluster for each different velocity
-        # then stack the result
-        possible_clusters = pd.DataFrame(
-            possible_clusters["clusters"].values.tolist(), index=possible_clusters.index
-        )
-        possible_clusters = pd.DataFrame(possible_clusters.stack())
-        possible_clusters.rename(columns={0: "obs_ids"}, inplace=True)
-        possible_clusters = pd.DataFrame(
-            possible_clusters["obs_ids"].values.tolist(), index=possible_clusters.index
+    if config.propagator == "PYOORB":
+        propagator = PYOORB
+    else:
+        raise ValueError(f"Unknown propagator: {config.propagator}")
+
+    use_ray = False
+    if config.max_processes is None or config.max_processes > 1:
+        # Initialize ray
+        if not ray.is_initialized():
+            logger.debug(
+                f"Ray is not initialized. Initializing with {config.max_processes}..."
+            )
+            ray.init(num_cpus=config.max_processes)
+
+        if not isinstance(observations, ray.ObjectRef):
+            observations = ray.put(observations)
+
+        use_ray = True
+
+    # Apply filters to the observations
+    filtered_observations = observations
+    if filters is None:
+        # By default we always filter by radius from the predicted position of the test orbit
+        filters = [TestOrbitRadiusObservationFilter(radius=config.cell_radius)]
+    for filter_i in filters:
+        filtered_observations = filter_i.apply(
+            filtered_observations, test_orbit, max_processes=config.max_processes
         )
 
-        # Drop duplicate clusters
-        possible_clusters.drop_duplicates(inplace=True)
-
-        # Set index names
-        possible_clusters.index.set_names(["velocity_id", "cluster_id"], inplace=True)
-
-        # Reset index
-        possible_clusters.reset_index("cluster_id", drop=True, inplace=True)
-        possible_clusters["cluster_id"] = [
-            str(uuid.uuid4().hex) for i in range(len(possible_clusters))
-        ]
+    # Defragment the observations
+    if len(filtered_observations) > 0:
+        filtered_observations = qv.defragment(filtered_observations)
 
-        # Make clusters DataFrame
-        clusters = possible_clusters.join(cluster_velocities)
-        clusters.reset_index(drop=True, inplace=True)
-        clusters = clusters[["cluster_id", "vtheta_x", "vtheta_y"]]
-
-        # Make cluster_members DataFrame
-        cluster_members = possible_clusters.reset_index(drop=True).copy()
-        cluster_members.index = cluster_members["cluster_id"]
-        cluster_members.drop("cluster_id", axis=1, inplace=True)
-        cluster_members = pd.DataFrame(cluster_members.stack())
-        cluster_members.rename(columns={0: "obs_id"}, inplace=True)
-        cluster_members.reset_index(inplace=True)
-        cluster_members.drop("level_1", axis=1, inplace=True)
-
-        # Calculate arc length and add it to the clusters dataframe
-        cluster_members_time = cluster_members.merge(
-            observations[["obs_id", "mjd_utc"]], on="obs_id", how="left"
-        )
-        clusters_time = (
-            cluster_members_time.groupby(by=["cluster_id"])["mjd_utc"]
-            .apply(lambda x: x.max() - x.min())
-            .to_frame()
-        )
-        clusters_time.reset_index(inplace=True)
-        clusters_time.rename(columns={"mjd_utc": "arc_length"}, inplace=True)
-        clusters = clusters.merge(
-            clusters_time[["cluster_id", "arc_length"]],
-            on="cluster_id",
-            how="left",
-        )
+    # Observations are no longer needed, so we can delete them
+    del observations
 
-    else:
-        cluster_members = pd.DataFrame(columns=["cluster_id", "obs_id"])
-        clusters = pd.DataFrame(
-            columns=["cluster_id", "vtheta_x", "vtheta_y", "arc_length"]
-        )
+    if use_ray:
+        filtered_observations = ray.put(filtered_observations)
 
-    time_end_restr = time.time()
-    logger.info(
-        "Restructuring completed in {:.3f} seconds.".format(
-            time_end_restr - time_start_restr
-        )
+    # Range and transform the observations
+    transformed_detections = range_and_transform(
+        test_orbit,
+        filtered_observations,
+        propagator=propagator,
+        max_processes=config.max_processes,
     )
-    logger.info("Found {} clusters.".format(len(clusters)))
-    logger.info(
-        "Clustering and restructuring completed in {:.3f} seconds.".format(
-            time_end_restr - time_start_cluster
-        )
+    yield transformed_detections
+
+    # TODO: ray support for the rest of the pipeline has not yet been implemented
+    # so we will convert the ray objects to regular objects for now
+    if use_ray:
+        filtered_observations = ray.get(filtered_observations)
+
+    # Run clustering
+    clusters, cluster_members = cluster_and_link(
+        transformed_detections,
+        vx_range=[config.vx_min, config.vx_max],
+        vy_range=[config.vy_min, config.vy_max],
+        vx_bins=config.vx_bins,
+        vy_bins=config.vy_bins,
+        radius=config.cluster_radius,
+        min_obs=config.cluster_min_obs,
+        min_arc_length=config.cluster_min_arc_length,
+        alg=config.cluster_algorithm,
+        chunk_size=config.cluster_chunk_size,
+        max_processes=config.max_processes,
+    )
+    yield clusters, cluster_members
+
+    # Run initial orbit determination
+    iod_orbits, iod_orbit_members = initial_orbit_determination(
+        filtered_observations,
+        cluster_members,
+        min_obs=config.iod_min_obs,
+        min_arc_length=config.iod_min_arc_length,
+        contamination_percentage=config.iod_contamination_percentage,
+        rchi2_threshold=config.iod_rchi2_threshold,
+        observation_selection_method=config.iod_observation_selection_method,
+        propagator=propagator,
+        propagator_kwargs={},
+        chunk_size=config.iod_chunk_size,
+        max_processes=config.max_processes,
+        # TODO: investigate whether these should be configurable
+        iterate=False,
+        light_time=True,
+        linkage_id_col="cluster_id",
+    )
+    yield iod_orbits, iod_orbit_members
+
+    # Run differential correction
+    od_orbits, od_orbit_members = differential_correction(
+        iod_orbits,
+        iod_orbit_members,
+        filtered_observations,
+        min_obs=config.od_min_obs,
+        min_arc_length=config.od_min_arc_length,
+        contamination_percentage=config.od_contamination_percentage,
+        rchi2_threshold=config.od_rchi2_threshold,
+        delta=config.od_delta,
+        max_iter=config.od_max_iter,
+        propagator=config.propagator,
+        propagator_kwargs={},
+        chunk_size=config.od_chunk_size,
+        max_processes=config.max_processes,
+        # TODO: investigate whether these should be configurable
+        method="central",
+        fit_epoch=False,
+    )
+    yield od_orbits, od_orbit_members
+
+    # Run arc extension
+    recovered_orbits, recovered_orbit_members = merge_and_extend_orbits(
+        od_orbits,
+        od_orbit_members,
+        filtered_observations,
+        min_obs=config.arc_extension_min_obs,
+        min_arc_length=config.arc_extension_min_arc_length,
+        contamination_percentage=config.arc_extension_contamination_percentage,
+        rchi2_threshold=config.arc_extension_rchi2_threshold,
+        radius=config.arc_extension_radius,
+        delta=config.od_delta,
+        max_iter=config.od_max_iter,
+        propagator=config.propagator,
+        propagator_kwargs={},
+        orbits_chunk_size=config.arc_extension_chunk_size,
+        max_processes=config.max_processes,
+        # TODO: investigate whether these should be configurable
+        method="central",
+        fit_epoch=False,
+        observations_chunk_size=100000,
     )
+    yield recovered_orbits, recovered_orbit_members
 
-    return clusters, cluster_members
+    time_end = time.perf_counter()
+    logger.info(f"Test orbit completed in {time_end-time_start:.3f} seconds.")
diff --git a/thor/main_2.py b/thor/main_2.py
deleted file mode 100644
index fd39b72c..00000000
--- a/thor/main_2.py
+++ /dev/null
@@ -1,506 +0,0 @@
-import logging
-import time
-from typing import Any, Iterator, List, Optional, Union
-
-import pandas as pd
-import pyarrow.compute as pc
-import quivr as qv
-import ray
-from adam_core.coordinates import (
-    CartesianCoordinates,
-    OriginCodes,
-    transform_coordinates,
-)
-from adam_core.propagator import PYOORB, Propagator
-
-from .config import Config
-from .main import clusterAndLink
-from .observations.filters import ObservationFilter, TestOrbitRadiusObservationFilter
-from .observations.observations import Observations, ObserversWithStates
-from .orbit import TestOrbit, TestOrbitEphemeris
-from .orbits import (
-    differentialCorrection,
-    initialOrbitDetermination,
-    mergeAndExtendOrbits,
-)
-from .projections import GnomonicCoordinates
-
-logger = logging.getLogger(__name__)
-
-
-class TransformedDetections(qv.Table):
-    id = qv.StringColumn()
-    coordinates = GnomonicCoordinates.as_column()
-    state_id = qv.Int64Column()
-
-
-def range_and_transform_worker(
-    ranged_detections: CartesianCoordinates,
-    observations: Observations,
-    ephemeris: TestOrbitEphemeris,
-    state_id: int,
-) -> TransformedDetections:
-    """
-    Given ranged detections and their original observations, transform these to the gnomonic tangent
-    plane centered on the motion of the test orbit for a single state.
-
-    Parameters
-    ----------
-    ranged_detections
-        Cartesian detections ranged so that their heliocentric distance is the same as the test orbit
-        for each state
-    observations
-        The observations from which the ranged detections originate. These should be sorted one-to-one
-        with the ranged detections
-    ephemeris
-        Ephemeris from which to extract the test orbit's aberrated state.
-    state_id
-        The ID for this particular state.
-
-    Returns
-    -------
-    transformed_detections
-        Detections transformed to a gnomonic tangent plane centered on the motion of the
-        test orbit.
-    """
-    # Select the detections and ephemeris for this state id
-    mask = pc.equal(state_id, observations.state_id)
-    ranged_detections_state = ranged_detections.apply_mask(mask)
-    ephemeris_state = ephemeris.select("id", state_id)
-    observations_state = observations.select("state_id", state_id)
-
-    # Transform the detections into the co-rotating frame
-    return TransformedDetections.from_kwargs(
-        id=observations_state.detections.id,
-        coordinates=GnomonicCoordinates.from_cartesian(
-            ranged_detections_state,
-            center_cartesian=ephemeris_state.ephemeris.aberrated_coordinates,
-        ),
-        state_id=observations_state.state_id,
-    )
-
-
-range_and_transform_remote = ray.remote(range_and_transform_worker)
-
-
-def range_and_transform(
-    test_orbit: TestOrbit,
-    observations: Union[Observations, ray.ObjectRef],
-    propagator: Propagator = PYOORB(),
-    max_processes: Optional[int] = 1,
-) -> TransformedDetections:
-    """
-    Range observations for a single test orbit and transform them into a
-    gnomonic projection centered on the motion of the test orbit (co-rotating
-    frame).
-
-    Parameters
-    ----------
-    test_orbit : `~thor.orbit.TestOrbit`
-        Test orbit to use to gather and transform observations.
-    observations : `~thor.observations.observations.Observations`
-        Observations from which range and transform the detections.
-    propagator : `~adam_core.propagator.propagator.Propagator`
-        Propagator to use to propagate the test orbit and generate
-        ephemerides.
-    max_processes : int, optional
-        Maximum number of processes to use for parallelization. If
-        an existing ray cluster is already running, this parameter
-        will be ignored if larger than 1 or not None.
-
-    Returns
-    -------
-    transformed_detections : `~thor.main.TransformedDetections`
-        The transformed detections as gnomonic coordinates
-        of the observations in the co-rotating frame.
-    """
-    time_start = time.perf_counter()
-    logger.info("Running range and transform...")
-    logger.info(f"Assuming r = {test_orbit.orbit.coordinates.r[0]} au")
-    logger.info(f"Assuming v = {test_orbit.orbit.coordinates.v[0]} au/d")
-
-    if isinstance(observations, ray.ObjectRef):
-        observations = ray.get(observations)
-
-    if len(observations) > 0:
-        # Compute the ephemeris of the test orbit (this will be cached)
-        ephemeris = test_orbit.generate_ephemeris_from_observations(
-            observations,
-            propagator=propagator,
-            max_processes=max_processes,
-        )
-
-        # Assume that the heliocentric distance of all point sources in
-        # the observations are the same as that of the test orbit
-        ranged_detections_spherical = test_orbit.range_observations(
-            observations,
-            propagator=propagator,
-            max_processes=max_processes,
-        )
-
-        # Transform from spherical topocentric to cartesian heliocentric coordinates
-        ranged_detections_cartesian = transform_coordinates(
-            ranged_detections_spherical.coordinates,
-            representation_out=CartesianCoordinates,
-            frame_out="ecliptic",
-            origin_out=OriginCodes.SUN,
-        )
-
-        transformed_detection_list = []
-        if max_processes is None or max_processes > 1:
-
-            if not ray.is_initialized():
-                logger.debug(
-                    f"Ray is not initialized. Initializing with {max_processes}..."
-                )
-                ray.init(num_cpus=max_processes)
-
-            if isinstance(observations, ray.ObjectRef):
-                observations_ref = observations
-                observations = ray.get(observations_ref)
-            else:
-                observations_ref = ray.put(observations)
-
-            if isinstance(ephemeris, ray.ObjectRef):
-                ephemeris_ref = ephemeris
-            else:
-                ephemeris_ref = ray.put(ephemeris)
-
-            ranged_detections_cartesian_ref = ray.put(ranged_detections_cartesian)
-
-            # Get state IDs
-            state_ids = observations.state_id.unique().sort()
-            futures = []
-            for state_id in state_ids:
-                futures.append(
-                    range_and_transform_remote.remote(
-                        ranged_detections_cartesian_ref,
-                        observations_ref,
-                        ephemeris_ref,
-                        state_id,
-                    )
-                )
-
-            while futures:
-                finished, futures = ray.wait(futures, num_returns=1)
-                transformed_detection_list.append(ray.get(finished[0]))
-
-        else:
-            # Get state IDs
-            state_ids = observations.state_id.unique().sort()
-            for state_id in state_ids:
-                mask = pc.equal(state_id, observations.state_id)
-                transformed_detection_list.append(
-                    range_and_transform_worker(
-                        ranged_detections_cartesian.apply_mask(mask),
-                        observations.select("state_id", state_id),
-                        ephemeris.select("id", state_id),
-                        state_id,
-                    )
-                )
-
-        transformed_detections = qv.concatenate(transformed_detection_list)
-        transformed_detections = transformed_detections.sort_by(by=["state_id"])
-
-    else:
-        transformed_detections = TransformedDetections.empty()
-
-    time_end = time.perf_counter()
-    logger.info(f"Transformed {len(transformed_detections)} observations.")
-    logger.info(
-        f"Range and transform completed in {time_end - time_start:.3f} seconds."
-    )
-    return transformed_detections
-
-
-def _observations_to_observations_df(observations: Observations) -> pd.DataFrame:
-    """
-    Convert THOR observations (v2.0) to the older format used by the rest of the
-    pipeline. This will eventually be removed once the rest of the pipeline is
-    updated to use the new format.
-
-    Parameters
-    ----------
-    observations : `~thor.observations.observations.Observations`
-        Observations to convert.
-
-    Returns
-    -------
-    observations_df : `~pandas.DataFrame`
-        Observations in the old format.
-    """
-    observations_df = observations.to_dataframe()
-    observations_df.rename(
-        columns={
-            "detections.id": "obs_id",
-            "detections.ra": "RA_deg",
-            "detections.dec": "Dec_deg",
-            "detections.ra_sigma": "RA_sigma_deg",
-            "detections.dec_sigma": "Dec_sigma_deg",
-            "detections.mag": "mag",
-            "detections.mag_sigma": "mag_sigma",
-        },
-        inplace=True,
-    )
-    observations_df["mjd_utc"] = (
-        observations.detections.time.rescale("utc").mjd().to_numpy(zero_copy_only=False)
-    )
-    return observations_df
-
-
-def _observers_with_states_to_observers_df(
-    observers: ObserversWithStates,
-) -> pd.DataFrame:
-    """
-    Convert THOR observers (v2.0) to the older format used by the rest of the
-    pipeline. This will eventually be removed once the rest of the pipeline is
-    updated to use the new format.
-
-    Parameters
-    ----------
-    observers : `~adam_core.observers.observers.Observers`
-        Observers to convert to a dataframe.
-
-    Returns
-    -------
-    observers_df : `~pandas.DataFrame`
-        Observers in the old format.
-    """
-    observers_df = observers.to_dataframe()
-    observers_df.rename(
-        columns={
-            "observers.coordinates.x": "obs_x",
-            "observers.coordinates.y": "obs_y",
-            "observers.coordinates.z": "obs_z",
-            "observers.coordinates.vx": "obs_vx",
-            "observers.coordinates.vy": "obs_vy",
-            "observers.coordinates.vz": "obs_vz",
-        },
-        inplace=True,
-    )
-    return observers_df
-
-
-def _transformed_detections_to_transformed_detections_df(
-    transformed_detections: TransformedDetections,
-) -> pd.DataFrame:
-    """
-    Convert THOR transformed detections (v2.0) to the older format used by the
-    rest of the pipeline. This will eventually be removed once the rest of the
-    pipeline is updated to use the new format.
-
-    Parameters
-    ----------
-    transformed_detections : `~thor.main.TransformedDetections`
-        Transformed detections to convert to a dataframe.
-
-    Returns
-    -------
-    transformed_detections_df : `~pandas.DataFrame`
-        Transformed detections in the old format.
-    """
-    transformed_detections_df = transformed_detections.to_dataframe()
-    transformed_detections_df.rename(
-        columns={
-            "id": "obs_id",
-            "coordinates.theta_x": "theta_x_deg",
-            "coordinates.theta_y": "theta_y_deg",
-        },
-        inplace=True,
-    )
-    return transformed_detections_df
-
-
-def link_test_orbit(
-    test_orbit: TestOrbit,
-    observations: Observations,
-    filters: Optional[List[ObservationFilter]] = None,
-    config: Optional[Config] = None,
-) -> Iterator[Any]:
-    """
-    Run THOR for a single test orbit on the given observations. This function will yield
-    results at each stage of the pipeline.
-        1. transformed_detections
-        2. clusters, cluster_members
-        3. iod_orbits, iod_orbit_members
-        4. od_orbits, od_orbit_members
-        5. recovered_orbits, recovered_orbit_members
-
-    Parameters
-    ----------
-    test_orbit : `~thor.orbit.TestOrbit`
-        Test orbit to use to gather and transform observations.
-    observations : `~thor.observations.observations.Observations`
-        Observations from which range and transform the detections.
-    filters : list of `~thor.observations.filters.ObservationFilter`, optional
-        List of filters to apply to the observations before running THOR.
-    propagator : `~adam_core.propagator.propagator.Propagator`
-        Propagator to use to propagate the test orbit and generate
-        ephemerides.
-    max_processes : int, optional
-        Maximum number of processes to use for parallelization.
-    """
-    time_start = time.perf_counter()
-    logger.info("Running test orbit...")
-
-    if config is None:
-        config = Config()
-
-    if config.propagator == "PYOORB":
-        propagator = PYOORB()
-    else:
-        raise ValueError(f"Unknown propagator: {config.propagator}")
-
-    use_ray = False
-    if config.max_processes is None or config.max_processes > 1:
-        # Initialize ray
-        if not ray.is_initialized():
-            logger.debug(
-                f"Ray is not initialized. Initializing with {config.max_processes}..."
-            )
-            ray.init(num_cpus=config.max_processes)
-
-        if not isinstance(observations, ray.ObjectRef):
-            observations = ray.put(observations)
-
-        use_ray = True
-
-    # Apply filters to the observations
-    filtered_observations = observations
-    if filters is None:
-        # By default we always filter by radius from the predicted position of the test orbit
-        filters = [TestOrbitRadiusObservationFilter(radius=config.cell_radius)]
-    for filter_i in filters:
-        filtered_observations = filter_i.apply(
-            filtered_observations, test_orbit, max_processes=config.max_processes
-        )
-
-    # Defragment the observations
-    if len(filtered_observations) > 0:
-        filtered_observations = qv.defragment(filtered_observations)
-
-    # Observations are no longer needed, so we can delete them
-    del observations
-
-    if use_ray:
-        filtered_observations = ray.put(filtered_observations)
-
-    # Range and transform the observations
-    transformed_detections = range_and_transform(
-        test_orbit,
-        filtered_observations,
-        propagator=propagator,
-        max_processes=config.max_processes,
-    )
-    yield transformed_detections
-
-    # TODO: ray support for the rest of the pipeline has not yet been implemented
-    # so we will convert the ray objects to regular objects for now
-    if use_ray:
-        filtered_observations = ray.get(filtered_observations)
-
-    if len(filtered_observations) > 0:
-        # Convert quivr tables to dataframes used by the rest of the pipeline
-        observations_df = _observations_to_observations_df(filtered_observations)
-        observers_df = _observers_with_states_to_observers_df(
-            filtered_observations.get_observers()
-        )
-        transformed_detections_df = (
-            _transformed_detections_to_transformed_detections_df(transformed_detections)
-        )
-
-        # Merge dataframes together
-        observations_df = observations_df.merge(observers_df, on="state_id")
-        transformed_detections_df = transformed_detections_df.merge(
-            observations_df[["obs_id", "mjd_utc", "observatory_code"]], on="obs_id"
-        )
-    else:
-        transformed_detections_df = pd.DataFrame()
-        observations_df = pd.DataFrame()
-
-    # Run clustering
-    clusters, cluster_members = clusterAndLink(
-        transformed_detections_df,
-        vx_range=[config.vx_min, config.vx_max],
-        vy_range=[config.vy_min, config.vy_max],
-        vx_bins=config.vx_bins,
-        vy_bins=config.vy_bins,
-        eps=config.cluster_radius,
-        min_obs=config.cluster_min_obs,
-        min_arc_length=config.cluster_min_arc_length,
-        alg=config.cluster_algorithm,
-        num_jobs=config.max_processes,
-        parallel_backend=config.parallel_backend,
-    )
-    yield clusters, cluster_members
-
-    # Run initial orbit determination
-    iod_orbits, iod_orbit_members = initialOrbitDetermination(
-        observations_df,
-        cluster_members,
-        min_obs=config.iod_min_obs,
-        min_arc_length=config.iod_min_arc_length,
-        contamination_percentage=config.iod_contamination_percentage,
-        rchi2_threshold=config.iod_rchi2_threshold,
-        observation_selection_method=config.iod_observation_selection_method,
-        propagator=config.propagator,
-        propagator_kwargs={},
-        chunk_size=config.iod_chunk_size,
-        num_jobs=config.max_processes,
-        parallel_backend=config.parallel_backend,
-        # TODO: investigate whether these should be configurable
-        iterate=False,
-        identify_subsets=False,
-        light_time=True,
-        linkage_id_col="cluster_id",
-    )
-    yield iod_orbits, iod_orbit_members
-
-    # Run differential correction
-    od_orbits, od_orbit_members = differentialCorrection(
-        iod_orbits,
-        iod_orbit_members,
-        observations_df,
-        min_obs=config.od_min_obs,
-        min_arc_length=config.od_min_arc_length,
-        contamination_percentage=config.od_contamination_percentage,
-        rchi2_threshold=config.od_rchi2_threshold,
-        delta=config.od_delta,
-        max_iter=config.od_max_iter,
-        propagator=config.propagator,
-        propagator_kwargs={},
-        chunk_size=config.od_chunk_size,
-        num_jobs=config.max_processes,
-        parallel_backend=config.parallel_backend,
-        # TODO: investigate whether these should be configurable
-        method="central",
-        fit_epoch=False,
-    )
-    yield od_orbits, od_orbit_members
-
-    # Run arc extension
-    recovered_orbits, recovered_orbit_members = mergeAndExtendOrbits(
-        od_orbits,
-        od_orbit_members,
-        observations_df,
-        min_obs=config.arc_extension_min_obs,
-        min_arc_length=config.arc_extension_min_arc_length,
-        contamination_percentage=config.arc_extension_contamination_percentage,
-        rchi2_threshold=config.arc_extension_rchi2_threshold,
-        eps=config.arc_extension_radius,
-        delta=config.od_delta,
-        max_iter=config.od_max_iter,
-        propagator=config.propagator,
-        propagator_kwargs={},
-        orbits_chunk_size=config.arc_extension_chunk_size,
-        num_jobs=config.max_processes,
-        parallel_backend=config.parallel_backend,
-        # TODO: investigate whether these should be configurable
-        method="central",
-        fit_epoch=False,
-        observations_chunk_size=100000,
-    )
-    yield recovered_orbits, recovered_orbit_members
-
-    time_end = time.perf_counter()
-    logger.info(f"Test orbit completed in {time_end-time_start:.3f} seconds.")
diff --git a/thor/observations/observations.py b/thor/observations/observations.py
index 0c820a79..8af588f1 100644
--- a/thor/observations/observations.py
+++ b/thor/observations/observations.py
@@ -4,6 +4,7 @@
 import pyarrow as pa
 import pyarrow.compute as pc
 import quivr as qv
+from adam_core.coordinates import CoordinateCovariances, Origin, SphericalCoordinates
 from adam_core.observations import Exposures, PointSourceDetections
 from adam_core.observers import Observers
 from adam_core.time import Timestamp
@@ -193,6 +194,28 @@ def get_observers(self) -> ObserversWithStates:
         observers = qv.concatenate(observers_with_states)
         return observers.sort_by("state_id")
 
+    def to_spherical_coordinates(self) -> SphericalCoordinates:
+        """
+        Convert the observations to spherical coordinates which can be used
+        to calculate residuals.
+
+        Returns
+        -------
+        coordinates : `~adam_core.coordinates.spherical.SphericalCoordinates`
+            The detections represented as spherical coordinates.
+        """
+        sigmas = np.zeros((len(self), 6))
+        sigmas[:, 1] = self.detections.ra_sigma.to_numpy(zero_copy_only=False)
+        sigmas[:, 2] = self.detections.dec_sigma.to_numpy(zero_copy_only=False)
+        return SphericalCoordinates.from_kwargs(
+            lon=self.detections.ra,
+            lat=self.detections.dec,
+            time=self.detections.time,
+            covariance=CoordinateCovariances.from_sigmas(sigmas),
+            origin=Origin.from_kwargs(code=self.observatory_code),
+            frame="equatorial",
+        )
+
     def select_exposure(self, exposure_id: int) -> "Observations":
         """
         Select observations from a single exposure.
diff --git a/thor/orbit_determination/__init__.py b/thor/orbit_determination/__init__.py
new file mode 100644
index 00000000..0e9f032d
--- /dev/null
+++ b/thor/orbit_determination/__init__.py
@@ -0,0 +1 @@
+from .fitted_orbits import *
diff --git a/thor/orbit_determination/fitted_orbits.py b/thor/orbit_determination/fitted_orbits.py
new file mode 100644
index 00000000..2ab94093
--- /dev/null
+++ b/thor/orbit_determination/fitted_orbits.py
@@ -0,0 +1,176 @@
+import uuid
+from typing import List, Literal, Optional, Tuple
+
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+import quivr as qv
+from adam_core.coordinates import CartesianCoordinates
+from adam_core.coordinates.residuals import Residuals
+from adam_core.orbits import Orbits
+
+from ..utils.quivr import drop_duplicates
+
+__all__ = [
+    "FittedOrbits",
+    "FittedOrbitMembers",
+]
+
+
+class FittedOrbits(qv.Table):
+
+    orbit_id = qv.StringColumn(default=lambda: uuid.uuid4().hex)
+    object_id = qv.StringColumn(nullable=True)
+    coordinates = CartesianCoordinates.as_column()
+    arc_length = qv.Float64Column()
+    num_obs = qv.Int64Column()
+    chi2 = qv.Float64Column()
+    reduced_chi2 = qv.Float64Column()
+    improved = qv.BooleanColumn(nullable=True)
+
+    def to_orbits(self) -> Orbits:
+        """
+        Convert fitted orbits to orbits that can be used by
+        a Propagator.
+
+        Returns
+        -------
+        orbits : `~adam_core.orbits.Orbits`
+            Orbits.
+        """
+        return Orbits.from_kwargs(
+            orbit_id=self.orbit_id,
+            object_id=self.object_id,
+            coordinates=self.coordinates,
+        )
+
+    def drop_duplicates(
+        self,
+        orbit_members: "FittedOrbitMembers",
+        subset: Optional[List[str]] = None,
+        keep: Literal["first", "last"] = "first",
+    ) -> Tuple["FittedOrbits", "FittedOrbitMembers"]:
+        """
+        Drop duplicate orbits from the fitted orbits and remove
+        the corresponding orbit members.
+
+        Parameters
+        ----------
+        orbit_members : `~thor.orbit_determination.FittedOrbitMembers`
+            Fitted orbit members.
+        subset : list of str, optional
+            Subset of columns to consider when dropping duplicates. If not specified all the columns
+            specifying unique state are used: time, x, y, z, vx, vy, vz.
+        keep : {'first', 'last'}, default 'first'
+            If there are duplicate rows then keep the first or last row.
+
+        Returns
+        -------
+        filtered : `~thor.orbit_determination.FittedOrbits`
+            Fitted orbits without duplicates.
+        filtered_orbit_members : `~thor.orbit_determination.FittedOrbitMembers`
+            Fitted orbit members without duplicates.
+        """
+        if subset is None:
+            subset = [
+                "coordinates.time.days",
+                "coordinates.time.nanos",
+                "coordinates.x",
+                "coordinates.y",
+                "coordinates.z",
+                "coordinates.vx",
+                "coordinates.vy",
+                "coordinates.vz",
+            ]
+
+        filtered = drop_duplicates(self, subset=subset, keep=keep)
+        filtered_orbit_members = orbit_members.apply_mask(
+            pc.is_in(orbit_members.orbit_id, filtered.orbit_id)
+        )
+        return filtered, filtered_orbit_members
+
+    def assign_duplicate_observations(
+        self, orbit_members: "FittedOrbitMembers"
+    ) -> Tuple["FittedOrbits", "FittedOrbitMembers"]:
+        """
+        Assigns observations that have been assigned to multiple orbits to the orbit with t
+        he most observations, longest arc length, and lowest reduced chi2.
+
+        Parameters
+        ----------
+        orbit_members : `~thor.orbit_determination.FittedOrbitMembers`
+            Fitted orbit members.
+
+        Returns
+        -------
+        filtered : `~thor.orbit_determination.FittedOrbits`
+            Fitted orbits with duplicate assignments removed.
+        filtered_orbit_members : `~thor.orbit_determination.FittedOrbitMembers`
+            Fitted orbit members with duplicate assignments removed.
+        """
+        # Sort by number of observations, arc length, and reduced chi2
+        # Here we assume that orbits that generally have more observations, longer arc lengths, and lower reduced chi2 are better
+        # as candidates for assigning detections that have been assigned to multiple orbits
+        sorted = self.sort_by(
+            [
+                ("num_obs", "descending"),
+                ("arc_length", "descending"),
+                ("reduced_chi2", "ascending"),
+            ]
+        )
+
+        # Extract the orbit IDs from the sorted table
+        orbit_ids = sorted.orbit_id.unique()
+
+        # Calculate the order in which these orbit IDs appear in the orbit_members table
+        order_in_orbits = pc.index_in(orbit_members.orbit_id, orbit_ids)
+
+        # Create an index into the orbit_members table and append the order_in_orbits column
+        orbit_members_table = (
+            orbit_members.flattened_table()
+            .append_column("index", pa.array(np.arange(len(orbit_members))))
+            .append_column("order_in_orbits", order_in_orbits)
+        )
+
+        # Drop the residual values (a list column) due to: https://github.com/apache/arrow/issues/32504
+        orbit_members_table = orbit_members_table.drop_columns(["residuals.values"])
+
+        # Sort orbit members by the orbit IDs (in the same order as the orbits table)
+        orbit_members_table = orbit_members_table.sort_by(
+            [("order_in_orbits", "ascending")]
+        )
+
+        # Now group by the orbit ID and aggregate the index column to get the first index for each orbit ID
+        indices = (
+            orbit_members_table.group_by("obs_id", use_threads=False)
+            .aggregate([("index", "first")])
+            .column("index_first")
+        )
+
+        # Use the indices to filter the orbit_members table and then use the resulting orbit IDs to filter the orbits table
+        filtered_orbit_members = orbit_members.take(indices)
+        filtered = self.apply_mask(
+            pc.is_in(self.orbit_id, filtered_orbit_members.orbit_id)
+        )
+
+        return filtered, filtered_orbit_members
+
+
+class FittedOrbitMembers(qv.Table):
+
+    orbit_id = qv.StringColumn()
+    obs_id = qv.StringColumn()
+    residuals = Residuals.as_column(nullable=True)
+    solution = qv.BooleanColumn(nullable=True)
+    outlier = qv.BooleanColumn(nullable=True)
+
+    def drop_outliers(self) -> "FittedOrbitMembers":
+        """
+        Drop outliers from the fitted orbit members.
+
+        Returns
+        -------
+        fitted_orbit_members : `~thor.orbit_determination.FittedOrbitMembers`
+            Fitted orbit members without outliers.
+        """
+        return self.apply_mask(pc.equal(self.outlier, False))
diff --git a/thor/orbit_selection.py b/thor/orbit_selection.py
index 10cd17cf..96fd76f2 100644
--- a/thor/orbit_selection.py
+++ b/thor/orbit_selection.py
@@ -5,8 +5,6 @@
 from adam_core.orbits import Orbits
 from astropy.time import Time
 
-from .utils import assignPatchesHEALPix, assignPatchesSquare
-
 __all__ = [
     "findAverageOrbits",
     "findTestOrbitsPatch",
diff --git a/thor/orbits/__init__.py b/thor/orbits/__init__.py
index 42552650..a97a21e9 100644
--- a/thor/orbits/__init__.py
+++ b/thor/orbits/__init__.py
@@ -3,7 +3,6 @@
 from .gauss import *
 from .gibbs import *
 from .herrick_gibbs import *
-from .residuals import *
 from .attribution import *
 from .iod import *
 from .od import *
diff --git a/thor/orbits/attribution.py b/thor/orbits/attribution.py
index 7a283277..f1684898 100644
--- a/thor/orbits/attribution.py
+++ b/thor/orbits/attribution.py
@@ -1,92 +1,157 @@
-import os
-from typing import Literal
-
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["OPENBLAS_NUM_THREADS"] = "1"
-os.environ["MKL_NUM_THREADS"] = "1"
-os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
-os.environ["NUMEXPR_NUM_THREADS"] = "1"
-
-import concurrent.futures as cf
 import logging
-import multiprocessing as mp
 import time
-from functools import partial
+from typing import Literal, Optional, Tuple
 
 import numpy as np
-import pandas as pd
+import numpy.typing as npt
+import pyarrow as pa
+import pyarrow.compute as pc
 import quivr as qv
-from adam_core.observers import Observers
+import ray
+from adam_core.coordinates.residuals import Residuals
 from adam_core.orbits import Orbits
 from adam_core.propagator import PYOORB
 from adam_core.propagator.utils import _iterate_chunks
-from adam_core.time import Timestamp
 from sklearn.neighbors import BallTree
 
-from ..utils import (
-    _checkParallel,
-    _initWorker,
-    calcChunkSize,
-    removeDuplicateObservations,
-    sortLinkages,
-    yieldChunks,
-)
-from .od import differentialCorrection
-from .residuals import calcResiduals
-from .utils import _ephemeris_to_dataframe
+from ..observations.observations import Observations
+from ..orbit_determination import FittedOrbitMembers, FittedOrbits
+from .od import differential_correction
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["attribution_worker", "attributeObservations", "mergeAndExtendOrbits"]
+__all__ = ["Attributions", "attribute_observations", "merge_and_extend_orbits"]
+
+
+LATLOT_INDEX = np.array([2, 1])
+
+
+class Attributions(qv.Table):
+    orbit_id = qv.StringColumn()
+    obs_id = qv.StringColumn()
+    residuals = Residuals.as_column(nullable=True)
+    distance = qv.Float64Column(nullable=True)
+
+    def drop_coincident_attributions(
+        self, observations: Observations
+    ) -> "Attributions":
+        """
+        Drop attributions that are coincident in time: two or more observations attributed
+        to the same orbit that occur at the same time. The observation with the lowest
+        distance is kept.
+
+        Parameters
+        ----------
+        observations : `~thor.observations.observations.Observations`
+            Observations which will be used to get the observation times.
+
+        Returns
+        -------
+        attributions : `~thor.orbits.attribution.Attributions`
+            Attributions table with coincident attributions removed.
+        """
+        # Flatten the table so nested columns are dot-delimited at the top level
+        flattened_table = self.flattened_table()
+
+        # Drop the residual values (a list column) due to: https://github.com/apache/arrow/issues/32504
+        flattened_table = flattened_table.drop(["residuals.values"])
+
+        # Filter the observations to only include those that have been attributed
+        # to an orbit
+        observations_filtered = observations.apply_mask(
+            pc.is_in(observations.detections.id, flattened_table.column("obs_id"))
+        )
+
+        # Flatten the observations table
+        flattened_observations = observations_filtered.flattened_table()
+
+        # Only keep relevant columns
+        flattened_observations = flattened_observations.select(
+            ["detections.id", "detections.time.days", "detections.time.nanos"]
+        )
+
+        # Join the time column back to the flattened attributions table
+        flattened_table = flattened_table.join(
+            flattened_observations, ["obs_id"], right_keys=["detections.id"]
+        )
+
+        # Add index column
+        flattened_table = flattened_table.add_column(
+            0, "index", pa.array(np.arange(len(flattened_table)))
+        )
+
+        # Sort the table
+        flattened_table = flattened_table.sort_by(
+            [
+                ("orbit_id", "ascending"),
+                ("detections.time.days", "ascending"),
+                ("detections.time.nanos", "ascending"),
+            ]
+        )
+
+        # Group by orbit ID and observation time
+        indices = (
+            flattened_table.group_by(
+                ["orbit_id", "detections.time.days", "detections.time.nanos"],
+                use_threads=False,
+            )
+            .aggregate([("index", "first")])
+            .column("index_first")
+        )
+
+        return self.take(indices)
 
 
 def attribution_worker(
-    orbits,
-    observations,
-    eps=1 / 3600,
-    include_probabilistic=True,
+    orbit_ids: npt.NDArray[np.str_],
+    observation_indices: npt.NDArray[np.int64],
+    orbits: FittedOrbits,
+    observations: Observations,
+    radius: float = 1 / 3600,
     propagator: Literal["PYOORB"] = "PYOORB",
     propagator_kwargs: dict = {},
-):
+) -> Attributions:
     if propagator == "PYOORB":
         prop = PYOORB(**propagator_kwargs)
     else:
         raise ValueError(f"Invalid propagator '{propagator}'.")
 
-    # Create Observers table
-    observers_list = []
-    for observatory_code in observations["observatory_code"].unique():
-        observers_list.append(
-            Observers.from_code(
-                observatory_code,
-                Timestamp.from_mjd(
-                    observations[
-                        observations["observatory_code"].isin([observatory_code])
-                    ]["mjd_utc"].unique(),
-                    scale="utc",
-                ),
-            )
-        )
+    # Select the orbits and observations for this batch
+    observations = observations.take(observation_indices)
+    orbits = orbits.apply_mask(pc.equal(orbits.orbit_id, orbit_ids))
+
+    # Get the unique observers for this batch of observations
+    observers_with_states = observations.get_observers()
+    observers = observers_with_states.observers
 
-    observers = qv.concatenate(observers_list)
-    observers = observers.sort_by(
-        ["coordinates.time.days", "coordinates.time.nanos", "code"]
+    # Generate ephemerides for each orbit at the observation times
+    ephemeris = prop.generate_ephemeris(
+        orbits, observers, chunk_size=len(orbits), max_processes=1
     )
 
-    # Genereate ephemerides for each orbit at the observation times
-    ephemeris = prop._generate_ephemeris(orbits, observers)
-    ephemeris = _ephemeris_to_dataframe(ephemeris)
-    # TODO: Replace types below with pyarrow/quivr tables
-
-    # Group the predicted ephemerides and observations by visit / exposure
-    ephemeris_grouped = ephemeris.groupby(by=["observatory_code", "mjd_utc"])
-    ephemeris_visits = [
-        ephemeris_grouped.get_group(g) for g in ephemeris_grouped.groups
-    ]
-    observations_grouped = observations.groupby(by=["observatory_code", "mjd_utc"])
-    observations_visits = [
-        observations_grouped.get_group(g) for g in observations_grouped.groups
-    ]
+    # Round the ephemeris and observations to the nearest millisecond
+    ephemeris = ephemeris.set_column(
+        "coordinates.time", ephemeris.coordinates.time.rounded(precision="ms")
+    )
+    observations_rounded = observations.set_column(
+        "detections.time", observations.detections.time.rounded(precision="ms")
+    )
+
+    # Create a linkage between the ephemeris and observations
+    linkage = qv.MultiKeyLinkage(
+        ephemeris,
+        observations_rounded,
+        left_keys={
+            "days": ephemeris.coordinates.time.days,
+            "nanos": ephemeris.coordinates.time.nanos,
+            "code": ephemeris.coordinates.origin.code,
+        },
+        right_keys={
+            "days": observations_rounded.detections.time.days,
+            "nanos": observations_rounded.detections.time.nanos,
+            "code": observations_rounded.observatory_code,
+        },
+    )
 
     # Loop through each unique exposure and visit, find the nearest observations within
     # eps (haversine metric)
@@ -94,33 +159,23 @@ def attribution_worker(
     orbit_ids_associated = []
     obs_ids_associated = []
     obs_times_associated = []
-    eps_rad = np.radians(eps)
+    radius_rad = np.radians(radius)
     residuals = []
-    stats = []
-    for ephemeris_visit, observations_visit in zip(
-        ephemeris_visits, observations_visits
-    ):
-
-        assert len(ephemeris_visit["mjd_utc"].unique()) == 1
-        assert len(observations_visit["mjd_utc"].unique()) == 1
-        assert (
-            observations_visit["mjd_utc"].unique()[0]
-            == ephemeris_visit["mjd_utc"].unique()[0]
-        )
+    for _, ephemeris_i, observations_i in linkage.iterate():
+
+        # Extract the observation IDs and times
+        obs_ids = observations_i.detections.id.to_numpy(zero_copy_only=False)
+        obs_times = observations_i.detections.time.mjd().to_numpy(zero_copy_only=False)
+        orbit_ids = ephemeris_i.orbit_id.to_numpy(zero_copy_only=False)
 
-        obs_ids = observations_visit[["obs_id"]].values
-        obs_times = observations_visit[["mjd_utc"]].values
-        orbit_ids = ephemeris_visit[["orbit_id"]].values
-        coords = observations_visit[["RA_deg", "Dec_deg"]].values
-        coords_latlon = observations_visit[["Dec_deg"]].values
-        coords_predicted = ephemeris_visit[["RA_deg", "Dec_deg"]].values
-        coords_sigma = observations_visit[["RA_sigma_deg", "Dec_sigma_deg"]].values
+        # Extract the spherical coordinates for both the observations
+        # and ephemeris
+        coords = observations_i.to_spherical_coordinates()
+        coords_predicted = ephemeris_i.coordinates
 
         # Haversine metric requires latitude first then longitude...
-        coords_latlon = np.radians(observations_visit[["Dec_deg", "RA_deg"]].values)
-        coords_predicted_latlon = np.radians(
-            ephemeris_visit[["Dec_deg", "RA_deg"]].values
-        )
+        coords_latlon = np.radians(coords.values[:, LATLOT_INDEX])
+        coords_predicted_latlon = np.radians(coords_predicted.values[:, LATLOT_INDEX])
 
         num_obs = len(coords_predicted)
         k = np.minimum(3, num_obs)
@@ -139,210 +194,112 @@ def attribution_worker(
 
         # Select all observations with distance smaller or equal
         # to the maximum given distance
-        mask = np.where(d <= eps_rad)
+        mask = np.where(d <= radius_rad)
 
         if len(d[mask]) > 0:
             orbit_ids_associated.append(orbit_ids[i[mask]])
             obs_ids_associated.append(obs_ids[mask[0]])
             obs_times_associated.append(obs_times[mask[0]])
-            distances.append(d[mask].reshape(-1, 1))
+            distances.append(d[mask])
 
-            residuals_visit, stats_visit = calcResiduals(
-                coords[mask[0]],
-                coords_predicted[i[mask]],
-                sigmas_actual=coords_sigma[mask[0]],
-                include_probabilistic=True,
+            residuals_i = Residuals.calculate(
+                coords.take(mask[0]), coords_predicted.take(i[mask])
             )
-            residuals.append(residuals_visit)
-            stats.append(np.vstack(stats_visit).T)
+            residuals.append(residuals_i)
 
     if len(distances) > 0:
-        distances = np.degrees(np.vstack(distances))
-        orbit_ids_associated = np.vstack(orbit_ids_associated)
-        obs_ids_associated = np.vstack(obs_ids_associated)
-        obs_times_associated = np.vstack(obs_times_associated)
-        residuals = np.vstack(residuals)
-        stats = np.vstack(stats)
-
-        attributions = {
-            "orbit_id": orbit_ids_associated[:, 0],
-            "obs_id": obs_ids_associated[:, 0],
-            "mjd_utc": obs_times_associated[:, 0],
-            "distance": distances[:, 0],
-            "residual_ra_arcsec": residuals[:, 0] * 3600,
-            "residual_dec_arcsec": residuals[:, 1] * 3600,
-            "chi2": stats[:, 0],
-        }
-        if include_probabilistic:
-            attributions["probability"] = stats[:, 1]
-            attributions["mahalanobis_distance"] = stats[:, 2]
-
-        attributions = pd.DataFrame(attributions)
+        distances = np.degrees(np.concatenate(distances))
+        orbit_ids_associated = np.concatenate(orbit_ids_associated)
+        obs_ids_associated = np.concatenate(obs_ids_associated)
+        obs_times_associated = np.concatenate(obs_times_associated)
+        residuals = qv.concatenate(residuals)
+
+        return Attributions.from_kwargs(
+            orbit_id=orbit_ids_associated,
+            obs_id=obs_ids_associated,
+            residuals=residuals,
+            distance=distances,
+        )
 
     else:
-        columns = [
-            "orbit_id",
-            "obs_id",
-            "mjd_utc",
-            "distance",
-            "residual_ra_arcsec",
-            "residual_dec_arcsec",
-            "chi2",
-        ]
-        if include_probabilistic:
-            columns += ["probability", "mahalanobis_distance"]
-
-        attributions = pd.DataFrame(columns=columns)
+        return Attributions.empty()
 
-    return attributions
+
+attribution_worker_remote = ray.remote(attribution_worker)
+attribution_worker_remote.options(
+    num_returns=1,
+    num_cpus=1,
+)
 
 
-def attributeObservations(
-    orbits,
-    observations,
-    eps=5 / 3600,
-    include_probabilistic=True,
+def attribute_observations(
+    orbits: Orbits,
+    observations: Observations,
+    radius: float = 5 / 3600,
     propagator: Literal["PYOORB"] = "PYOORB",
     propagator_kwargs: dict = {},
-    orbits_chunk_size=10,
-    observations_chunk_size=100000,
-    num_jobs=1,
-    parallel_backend="cf",
-):
+    orbits_chunk_size: int = 10,
+    observations_chunk_size: int = 100000,
+    max_processes: Optional[int] = 1,
+) -> Attributions:
     logger.info("Running observation attribution...")
     time_start = time.time()
 
-    num_orbits = len(orbits)
-
-    attribution_dfs = []
-
-    parallel, num_workers = _checkParallel(num_jobs, parallel_backend)
-    if num_workers > 1:
-
-        if parallel_backend == "ray":
-            import ray
-
-            if not ray.is_initialized():
-                ray.init(address="auto")
-
-            attribution_worker_ray = ray.remote(attribution_worker)
-            attribution_worker_ray = attribution_worker_ray.options(
-                num_returns=1, num_cpus=1
-            )
-
-            # Send up to orbits_chunk_size orbits to each OD worker for processing
-            chunk_size_ = calcChunkSize(
-                num_orbits, num_workers, orbits_chunk_size, min_chunk_size=1
-            )
-            orbits_split = [chunk for chunk in _iterate_chunks(orbits, chunk_size_)]
-
-            obs_oids = []
-            for observations_c in yieldChunks(observations, observations_chunk_size):
-                obs_oids.append(ray.put(observations_c))
-
-            p = []
-            for obs_oid in obs_oids:
-                for orbit_i in orbits_split:
-                    p.append(
-                        attribution_worker_ray.remote(
-                            orbit_i,
-                            obs_oid,
-                            eps=eps,
-                            include_probabilistic=include_probabilistic,
-                            propagator=propagator,
-                            propagator_kwargs=propagator_kwargs,
-                        )
-                    )
-
-                attribution_dfs_i = ray.get(p)
-                attribution_dfs += attribution_dfs_i
-
-        elif parallel_backend == "mp":
-            p = mp.Pool(
-                processes=num_workers,
-                initializer=_initWorker,
-            )
-
-            # Send up to orbits_chunk_size orbits to each OD worker for processing
-            chunk_size_ = calcChunkSize(
-                num_orbits, num_workers, orbits_chunk_size, min_chunk_size=1
-            )
-            orbits_split = [chunk for chunk in _iterate_chunks(orbits, chunk_size_)]
-
-            for observations_c in yieldChunks(observations, observations_chunk_size):
-
-                obs = [observations_c for i in range(len(orbits_split))]
-                attribution_dfs_i = p.starmap(
-                    partial(
-                        attribution_worker,
-                        eps=eps,
-                        include_probabilistic=include_probabilistic,
+    orbit_ids = orbits.orbit_id
+    observation_indices = np.arange(0, len(observations))
+
+    attributions_list = []
+    if max_processes is None or max_processes > 1:
+
+        if not ray.is_initialized():
+            ray.init(address="auto")
+
+        observations_ref = ray.put(observations)
+        orbits_ref = ray.put(orbits)
+
+        futures = []
+        for orbit_id_chunk in _iterate_chunks(orbit_ids, orbits_chunk_size):
+            for observations_indices_chunk in _iterate_chunks(
+                observation_indices, observations_chunk_size
+            ):
+                futures.append(
+                    attribution_worker_remote.remote(
+                        orbit_id_chunk,
+                        observations_indices_chunk,
+                        orbits_ref,
+                        observations_ref,
+                        radius=radius,
                         propagator=propagator,
                         propagator_kwargs=propagator_kwargs,
-                    ),
-                    zip(
-                        orbits_split,
-                        obs,
-                    ),
-                )
-                attribution_dfs += attribution_dfs_i
-
-            p.close()
-
-        elif parallel_backend == "cf":
-            with cf.ProcessPoolExecutor(
-                max_workers=num_workers, initializer=_initWorker
-            ) as executor:
-                futures = []
-                for observations_c in yieldChunks(
-                    observations, observations_chunk_size
-                ):
-                    for orbit_c in _iterate_chunks(orbits, orbits_chunk_size):
-                        futures.append(
-                            executor.submit(
-                                attribution_worker,
-                                orbit_c,
-                                observations_c,
-                                eps=eps,
-                                include_probabilistic=include_probabilistic,
-                                propagator=propagator,
-                                propagator_kwargs=propagator_kwargs,
-                            )
-                        )
-                attribution_dfs = []
-                for future in cf.as_completed(futures):
-                    attribution_dfs.append(future.result())
-
-        else:
-            raise ValueError(
-                "Invalid parallel_backend '{}'. Must be one of 'ray', 'mp', or 'cf'.".format(
-                    parallel_backend
+                    )
                 )
-            )
+
+        while futures:
+            finished, futures = ray.wait(futures, num_returns=1)
+            attributions_list.append(ray.get(finished[0]))
 
     else:
-        for observations_c in yieldChunks(observations, observations_chunk_size):
-            for orbit_c in _iterate_chunks(orbits, orbits_chunk_size):
+        for orbit_id_chunk in _iterate_chunks(orbit_ids, orbits_chunk_size):
+            for observations_indices_chunk in _iterate_chunks(
+                observation_indices, observations_chunk_size
+            ):
                 attribution_df_i = attribution_worker(
-                    orbit_c,
-                    observations_c,
-                    eps=eps,
-                    include_probabilistic=include_probabilistic,
+                    orbit_id_chunk,
+                    observations_indices_chunk,
+                    orbits,
+                    observations,
+                    radius=radius,
                     propagator=propagator,
                     propagator_kwargs=propagator_kwargs,
                 )
-                attribution_dfs.append(attribution_df_i)
+                attributions_list.append(attribution_df_i)
 
-    attributions = pd.concat(attribution_dfs)
-    attributions.sort_values(
-        by=["orbit_id", "mjd_utc", "distance"], inplace=True, ignore_index=True
-    )
+    attributions = qv.concatenate(attributions_list)
+    attributions = attributions.sort_by(["orbit_id", "obs_id", "distance"])
 
     time_end = time.time()
     logger.info(
-        "Attributed {} observations to {} orbits.".format(
-            attributions["obs_id"].nunique(), attributions["orbit_id"].nunique()
-        )
+        f"Attributed {len(attributions.obs_id.unique())} observations to {len(attributions.orbit_id.unique())} orbits."
     )
     logger.info(
         "Attribution completed in {:.3f} seconds.".format(time_end - time_start)
@@ -350,26 +307,25 @@ def attributeObservations(
     return attributions
 
 
-def mergeAndExtendOrbits(
-    orbits,
-    orbit_members,
-    observations,
-    min_obs=6,
-    min_arc_length=1.0,
-    contamination_percentage=20.0,
-    rchi2_threshold=5,
-    eps=1 / 3600,
-    delta=1e-8,
-    max_iter=20,
-    method="central",
-    fit_epoch=False,
+def merge_and_extend_orbits(
+    orbits: FittedOrbits,
+    orbit_members: FittedOrbitMembers,
+    observations: Observations,
+    min_obs: int = 6,
+    min_arc_length: float = 1.0,
+    contamination_percentage: float = 20.0,
+    rchi2_threshold: float = 5,
+    radius: float = 1 / 3600,
+    delta: float = 1e-8,
+    max_iter: int = 20,
+    method: Literal["central", "finite"] = "central",
+    fit_epoch: bool = False,
     propagator: Literal["PYOORB"] = "PYOORB",
     propagator_kwargs: dict = {},
-    orbits_chunk_size=10,
-    observations_chunk_size=100000,
-    num_jobs=60,
-    parallel_backend="cf",
-):
+    orbits_chunk_size: int = 10,
+    observations_chunk_size: int = 100000,
+    max_processes: Optional[int] = 1,
+) -> Tuple[FittedOrbits, FittedOrbitMembers]:
     """
     Attempt to extend an orbit's observational arc by running
     attribution on the observations. This is an iterative process: attribution
@@ -392,76 +348,47 @@ def mergeAndExtendOrbits(
     time_start = time.time()
     logger.info("Running orbit extension and merging...")
 
-    if len(observations) > 0:
-        orbits_iter, orbit_members_iter = sortLinkages(
-            orbits, orbit_members, observations, linkage_id_col="orbit_id"
-        )
-
-    else:
-        orbits_iter = orbits.copy()
-        orbit_members_iter = orbit_members.copy()
+    # Set the running variables
+    orbits_iter = orbits
+    orbit_members_iter = orbit_members
+    observations_iter = observations
 
     iterations = 0
-    odp_orbits_dfs = []
-    odp_orbit_members_dfs = []
-    observations_iter = observations.copy()
+    odp_orbits_list = []
+    odp_orbit_members_list = []
     if len(orbits_iter) > 0 and len(observations_iter) > 0:
-        converged = False
 
+        converged = False
         while not converged:
             # Run attribution
-            attributions = attributeObservations(
-                Orbits.from_flat_dataframe(orbits_iter),
+            attributions = attribute_observations(
+                orbits_iter.to_orbits(),
                 observations_iter,
-                eps=eps,
-                include_probabilistic=True,
+                radius=radius,
                 propagator=propagator,
                 propagator_kwargs=propagator_kwargs,
                 orbits_chunk_size=orbits_chunk_size,
                 observations_chunk_size=observations_chunk_size,
-                num_jobs=num_jobs,
-                parallel_backend=parallel_backend,
+                max_processes=max_processes,
             )
 
-            assert np.all(
-                np.isin(
-                    orbit_members_iter["obs_id"].unique(),
-                    observations_iter["obs_id"].unique(),
-                )
-            )
+            # For orbits with coincident observations: multiple observations attributed at
+            # the same time, keep only observation with smallest distance
+            attributions = attributions.drop_coincident_attributions(observations)
 
-            # Attributions are sorted by orbit ID, observation time and
-            # angular distance. Keep only the one observation with smallest distance
-            # for any orbits that have multiple observations attributed at the same observation time.
-            attributions.drop_duplicates(
-                subset=["orbit_id", "mjd_utc"],
-                keep="first",
-                inplace=True,
-                ignore_index=True,
+            # Create a new orbit members table with the newly attributed observations and
+            # filter the orbits to only include those that still have observations
+            orbit_members_iter = FittedOrbitMembers.from_kwargs(
+                orbit_id=attributions.orbit_id,
+                obs_id=attributions.obs_id,
+                residuals=attributions.residuals,
             )
-            orbit_members_iter = attributions[
-                [
-                    "orbit_id",
-                    "obs_id",
-                    "residual_ra_arcsec",
-                    "residual_dec_arcsec",
-                    "chi2",
-                ]
-            ]
-            orbits_iter = orbits_iter[
-                orbits_iter["orbit_id"].isin(orbit_members_iter["orbit_id"].unique())
-            ]
-
-            orbits_iter, orbit_members_iter = sortLinkages(
-                orbits_iter,
-                orbit_members_iter[["orbit_id", "obs_id"]],
-                observations_iter,
+            orbits_iter = orbits_iter.apply_mask(
+                pc.is_in(orbits_iter.orbit_id, orbit_members_iter.orbit_id.unique())
             )
 
-            # Run differential orbit correction on all orbits
-            # with the newly added observations to the orbits
-            # that had observations attributed to them
-            orbits_iter, orbit_members_iter = differentialCorrection(
+            # Run differential orbit correction
+            orbits_iter, orbit_members_iter = differential_correction(
                 orbits_iter,
                 orbit_members_iter,
                 observations_iter,
@@ -472,118 +399,123 @@ def mergeAndExtendOrbits(
                 delta=delta,
                 method=method,
                 max_iter=max_iter,
-                fit_epoch=False,
+                fit_epoch=fit_epoch,
                 propagator=propagator,
                 propagator_kwargs=propagator_kwargs,
                 chunk_size=orbits_chunk_size,
-                num_jobs=num_jobs,
-                parallel_backend=parallel_backend,
+                max_processes=max_processes,
+            )
+            orbit_members_iter = orbit_members_iter.drop_outliers()
+
+            # Remove any duplicate orbits
+            orbits_iter = orbits_iter.sort_by([("reduced_chi2", "ascending")])
+            orbits_iter, orbit_members_iter = orbits_iter.drop_duplicates(
+                orbit_members_iter,
+                subset=[
+                    "coordinates.time.days",
+                    "coordinates.time.nanos",
+                    "coordinates.x",
+                    "coordinates.y",
+                    "coordinates.z",
+                    "coordinates.vx",
+                    "coordinates.vy",
+                    "coordinates.vz",
+                ],
+                keep="first",
             )
-            orbit_members_iter = orbit_members_iter[orbit_members_iter["outlier"] == 0]
-            orbit_members_iter.reset_index(inplace=True, drop=True)
 
             # Remove the orbits that were not improved from the pool of available orbits. Orbits that were not improved
             # are orbits that have already iterated to their best-fit solution given the observations available. These orbits
             # are unlikely to recover more observations in subsequent iterations and so can be saved for output.
-            not_improved = orbits_iter[orbits_iter["improved"] == False][
-                "orbit_id"
-            ].values
-            orbits_out = orbits_iter[orbits_iter["orbit_id"].isin(not_improved)].copy()
-            orbit_members_out = orbit_members_iter[
-                orbit_members_iter["orbit_id"].isin(not_improved)
-            ].copy()
-            not_improved_obs_ids = orbit_members_out["obs_id"].values
-
-            # If some orbits that were not improved still share observations, keep the orbit with the lowest
-            # reduced chi2 in the pool of orbits but delete the others.
-            obs_id_occurences = orbit_members_out["obs_id"].value_counts()
-            duplicate_obs_ids = obs_id_occurences.index.values[
-                obs_id_occurences.values > 1
-            ]
+            not_improved_mask = pc.equal(orbits_iter.improved, False)
+            orbits_out = orbits_iter.apply_mask(not_improved_mask)
+            orbit_members_out = orbit_members_iter.apply_mask(
+                pc.is_in(orbit_members_iter.orbit_id, orbits_out.orbit_id)
+            )
 
-            logger.info(
-                "There are {} observations that appear in more than one orbit.".format(
-                    len(duplicate_obs_ids)
-                )
+            # If some of the orbits that haven't improved in their orbit fit still share observations
+            # then assign those observations to the orbit with the most observations, longest arc length,
+            # and lowest reduced chi2 (and remove the other instances of that observation)
+            # We will one final iteration of OD on the output orbits later
+            orbits_out, orbit_members_out = orbits_out.assign_duplicate_observations(
+                orbit_members_out
             )
-            orbits_out, orbit_members_out = removeDuplicateObservations(
-                orbits_out,
-                orbit_members_out,
-                min_obs=min_obs,
-                linkage_id_col="orbit_id",
-                filter_cols=["num_obs", "arc_length", "r_sigma", "v_sigma"],
-                ascending=[False, False, True, True],
+
+            # Add these orbits to the output list
+            odp_orbits_list.append(orbits_out)
+            odp_orbit_members_list.append(orbit_members_out)
+
+            # Remove observations that have been added to the output list of orbits
+            observations_iter = observations_iter.apply_mask(
+                pc.invert(
+                    pc.is_in(
+                        observations_iter.detections.id,
+                        orbit_members_out.obs_id.unique(),
+                    )
+                )
             )
 
-            observations_iter = observations_iter[
-                ~observations_iter["obs_id"].isin(orbit_members_out["obs_id"].values)
-            ]
-            orbit_members_iter = orbit_members_iter[
-                ~orbit_members_iter["orbit_id"].isin(orbits_out["orbit_id"].values)
-            ]
-            orbit_members_iter = orbit_members_iter[
-                orbit_members_iter["obs_id"].isin(observations_iter["obs_id"].values)
-            ]
-            orbits_iter = orbits_iter[
-                orbits_iter["orbit_id"].isin(orbit_members_iter["orbit_id"].unique())
-            ]
-            orbit_members_iter = orbit_members_iter[["orbit_id", "obs_id"]]
+            # Identify the orbit that could still be improved more
+            improved_mask = pc.invert(not_improved_mask)
+            orbits_iter = orbits_iter.apply_mask(improved_mask)
+            orbit_members_iter = orbit_members_iter.apply_mask(
+                pc.is_in(orbit_members_iter.orbit_id, orbits_iter.orbit_id)
+            )
 
-            odp_orbits_dfs.append(orbits_out)
-            odp_orbit_members_dfs.append(orbit_members_out)
+            # Remove observations that have been added to the output list of orbits
+            # from the orbits that we will continue iterating over
+            orbit_members_iter = orbit_members_iter.apply_mask(
+                pc.invert(
+                    pc.is_in(
+                        orbit_members_iter.obs_id,
+                        orbit_members_out.obs_id.unique(),
+                    )
+                )
+            )
+            orbits_iter = orbits_iter.apply_mask(
+                pc.is_in(orbits_iter.orbit_id, orbit_members_iter.orbit_id.unique())
+            )
 
             iterations += 1
             if len(orbits_iter) == 0:
                 converged = True
 
-        odp_orbits = pd.concat(odp_orbits_dfs)
-        odp_orbit_members = pd.concat(odp_orbit_members_dfs)
-
-        odp_orbits.drop(columns=["improved"], inplace=True)
-        odp_orbits, odp_orbit_members = sortLinkages(
-            odp_orbits, odp_orbit_members, observations, linkage_id_col="orbit_id"
-        )
+        odp_orbits = qv.concatenate(odp_orbits_list)
+        odp_orbit_members = qv.concatenate(odp_orbit_members_list)
+
+        if len(odp_orbits) > 0:
+            # Do one final iteration of OD on the output orbits. This
+            # will update any fits of orbits that might have had observations
+            # removed during the assign_duplicate_observations step
+            odp_orbits, odp_orbit_members = differential_correction(
+                odp_orbits,
+                odp_orbit_members,
+                observations,
+                rchi2_threshold=rchi2_threshold,
+                min_obs=min_obs,
+                min_arc_length=min_arc_length,
+                contamination_percentage=contamination_percentage,
+                delta=delta,
+                method=method,
+                max_iter=max_iter,
+                fit_epoch=fit_epoch,
+                propagator=propagator,
+                propagator_kwargs=propagator_kwargs,
+                chunk_size=orbits_chunk_size,
+                max_processes=max_processes,
+            )
+            odp_orbit_members = odp_orbit_members.drop_outliers()
 
     else:
-        odp_orbits = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "mjd_tdb",
-                "x",
-                "y",
-                "z",
-                "vx",
-                "vy",
-                "vz",
-                "covariance",
-                "arc_length",
-                "num_obs",
-                "chi2",
-                "rchi2",
-            ]
-        )
-
-        odp_orbit_members = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "obs_id",
-                "residual_ra_arcsec",
-                "residual_dec_arcsec",
-                "chi2",
-                "outlier",
-            ]
-        )
+        odp_orbits = FittedOrbits.empty()
+        odp_orbit_members = FittedOrbitMembers.empty()
 
     time_end = time.time()
     logger.info(
-        "Number of attribution / differential correction iterations: {}".format(
-            iterations
-        )
+        f"Number of attribution / differential correction iterations: {iterations}"
     )
     logger.info(
-        "Extended and/or merged {} orbits into {} orbits.".format(
-            len(orbits), len(odp_orbits)
-        )
+        f"Extended and/or merged {len(orbits)} orbits into {len(odp_orbits)} orbits."
     )
     logger.info(
         "Orbit extension and merging completed in {:.3f} seconds.".format(
diff --git a/thor/orbits/gauss.py b/thor/orbits/gauss.py
index b639bd2d..9932e4b3 100644
--- a/thor/orbits/gauss.py
+++ b/thor/orbits/gauss.py
@@ -1,4 +1,5 @@
 import numpy as np
+from adam_core.constants import Constants as c
 from adam_core.coordinates import (
     CartesianCoordinates,
     Origin,
@@ -7,11 +8,9 @@
 )
 from adam_core.orbits import Orbits
 from adam_core.time import Timestamp
-from astropy.time import Time
 from numba import jit
 from numpy import roots
 
-from ..constants import Constants as c
 from .gibbs import calcGibbs
 from .herrick_gibbs import calcHerrickGibbs
 from .iterators import iterateStateTransition
diff --git a/thor/orbits/gibbs.py b/thor/orbits/gibbs.py
index 0b815f4d..5b7be573 100644
--- a/thor/orbits/gibbs.py
+++ b/thor/orbits/gibbs.py
@@ -1,6 +1,5 @@
 import numpy as np
-
-from ..constants import Constants as c
+from adam_core.constants import Constants as c
 
 __all__ = ["calcGibbs"]
 
diff --git a/thor/orbits/herrick_gibbs.py b/thor/orbits/herrick_gibbs.py
index 54f8432e..fff3967c 100644
--- a/thor/orbits/herrick_gibbs.py
+++ b/thor/orbits/herrick_gibbs.py
@@ -1,6 +1,5 @@
 import numpy as np
-
-from ..constants import Constants as c
+from adam_core.constants import Constants as c
 
 __all__ = ["calcHerrickGibbs"]
 
diff --git a/thor/orbits/iod.py b/thor/orbits/iod.py
index 67fbe54d..39c8afa3 100644
--- a/thor/orbits/iod.py
+++ b/thor/orbits/iod.py
@@ -1,45 +1,32 @@
-import os
-from typing import Literal
-
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["OPENBLAS_NUM_THREADS"] = "1"
-os.environ["MKL_NUM_THREADS"] = "1"
-os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
-os.environ["NUMEXPR_NUM_THREADS"] = "1"
-
-import concurrent.futures as cf
 import logging
-import multiprocessing as mp
 import time
-from functools import partial
 from itertools import combinations
+from typing import Literal, Optional, Tuple, Type, Union
 
 import numpy as np
-import pandas as pd
+import numpy.typing as npt
+import pyarrow as pa
+import pyarrow.compute as pc
 import quivr as qv
-from adam_core.observers import Observers
-from adam_core.propagator import PYOORB
-from adam_core.time import Timestamp
-from astropy.time import Time
-
-from ..utils import (
-    _checkParallel,
-    _initWorker,
-    calcChunkSize,
-    identifySubsetLinkages,
-    sortLinkages,
-    yieldChunks,
-)
+import ray
+from adam_core.coordinates.residuals import Residuals
+from adam_core.propagator import PYOORB, Propagator
+from adam_core.propagator.utils import _iterate_chunks
+
+from ..clusters import ClusterMembers
+from ..observations.observations import Observations
+from ..orbit_determination.fitted_orbits import FittedOrbitMembers, FittedOrbits
 from .gauss import gaussIOD
-from .residuals import calcResiduals
-from .utils import _ephemeris_to_dataframe
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["selectObservations", "iod", "iod_worker", "initialOrbitDetermination"]
+__all__ = ["initial_orbit_determination"]
 
 
-def selectObservations(observations, method="combinations"):
+def select_observations(
+    observations: Observations,
+    method: Literal["combinations", "first+middle+last", "thirds"] = "combinations",
+) -> npt.NDArray[np.str_]:
     """
     Selects which three observations to use for IOD depending on the method.
 
@@ -64,12 +51,12 @@ def selectObservations(observations, method="combinations"):
         An array of selected observation IDs. If three unique observations could
         not be selected then returns an empty array.
     """
-    obs_ids = observations["obs_id"].values
+    obs_ids = observations.detections.id.to_numpy(zero_copy_only=False)
     if len(obs_ids) < 3:
         return np.array([])
 
     indexes = np.arange(0, len(obs_ids))
-    times = observations["mjd_utc"].values
+    times = observations.detections.time.mjd().to_numpy(zero_copy_only=False)
 
     if method == "first+middle+last":
         selected_times = np.percentile(times, [0, 50, 100], interpolation="nearest")
@@ -123,68 +110,86 @@ def selectObservations(observations, method="combinations"):
 
 
 def iod_worker(
-    observations_list,
-    observation_selection_method="combinations",
-    min_obs=6,
-    min_arc_length=1.0,
-    rchi2_threshold=10**3,
-    contamination_percentage=0.0,
-    iterate=False,
-    light_time=True,
-    linkage_id_col="cluster_id",
-    propagator: Literal["PYOORB"] = "PYOORB",
+    linkage_ids: npt.NDArray[np.str_],
+    observations: Union[Observations, ray.ObjectRef],
+    linkage_members: Union[ClusterMembers, FittedOrbitMembers, ray.ObjectRef],
+    min_obs: int = 6,
+    min_arc_length: float = 1.0,
+    contamination_percentage: float = 0.0,
+    rchi2_threshold: float = 200,
+    observation_selection_method: Literal[
+        "combinations", "first+middle+last", "thirds"
+    ] = "combinations",
+    linkage_id_col: str = "cluster_id",
+    iterate: bool = False,
+    light_time: bool = True,
+    propagator: Type[Propagator] = PYOORB,
     propagator_kwargs: dict = {},
-):
-    iod_orbits_dfs = []
-    iod_orbit_members_dfs = []
-    for observations in observations_list:
-        assert np.all(
-            sorted(observations["mjd_utc"].values) == observations["mjd_utc"].values
-        )
+) -> Tuple[FittedOrbits, FittedOrbitMembers]:
+
+    prop = propagator(**propagator_kwargs)
+
+    iod_orbits_list = []
+    iod_orbit_members_list = []
+    for linkage_id in linkage_ids:
 
         time_start = time.time()
-        linkage_id = observations[linkage_id_col].unique()[0]
         logger.debug(f"Finding initial orbit for linkage {linkage_id}...")
 
+        obs_ids = linkage_members.apply_mask(
+            pc.equal(linkage_members.column(linkage_id_col), linkage_id)
+        ).obs_id
+        observations_linkage = observations.apply_mask(
+            pc.is_in(observations.detections.id, obs_ids)
+        )
+
         iod_orbit, iod_orbit_members = iod(
-            observations,
-            observation_selection_method=observation_selection_method,
+            observations_linkage,
             min_obs=min_obs,
             min_arc_length=min_arc_length,
             rchi2_threshold=rchi2_threshold,
             contamination_percentage=contamination_percentage,
+            observation_selection_method=observation_selection_method,
             iterate=iterate,
             light_time=light_time,
-            propagator=propagator,
-            propagator_kwargs=propagator_kwargs,
+            propagator=prop,
         )
         if len(iod_orbit) > 0:
-            iod_orbit.insert(1, linkage_id_col, linkage_id)
+            iod_orbit = iod_orbit.set_column("orbit_id", pa.array([linkage_id]))
+            iod_orbit_members = iod_orbit_members.set_column(
+                "orbit_id",
+                pa.array([linkage_id for i in range(len(iod_orbit_members))]),
+            )
 
         time_end = time.time()
         duration = time_end - time_start
         logger.debug(f"IOD for linkage {linkage_id} completed in {duration:.3f}s.")
 
-        iod_orbits_dfs.append(iod_orbit)
-        iod_orbit_members_dfs.append(iod_orbit_members)
+        iod_orbits_list.append(iod_orbit)
+        iod_orbit_members_list.append(iod_orbit_members)
 
-    iod_orbits = pd.concat(iod_orbits_dfs, ignore_index=True)
-    iod_orbit_members = pd.concat(iod_orbit_members_dfs, ignore_index=True)
+    iod_orbits = qv.concatenate(iod_orbits_list)
+    iod_orbit_members = qv.concatenate(iod_orbit_members_list)
     return iod_orbits, iod_orbit_members
 
 
+iod_worker_remote = ray.remote(iod_worker)
+iod_worker_remote.options(num_returns=1, num_cpus=1)
+
+
 def iod(
-    observations,
-    min_obs=6,
-    min_arc_length=1.0,
-    contamination_percentage=0.0,
-    rchi2_threshold=200,
-    observation_selection_method="combinations",
-    iterate=False,
-    light_time=True,
-    propagator: Literal["PYOORB"] = "PYOORB",
-    propagator_kwargs: dict = {},
-):
+    observations: Observations,
+    min_obs: int = 6,
+    min_arc_length: float = 1.0,
+    contamination_percentage: float = 0.0,
+    rchi2_threshold: float = 200,
+    observation_selection_method: Literal[
+        "combinations", "first+middle+last", "thirds"
+    ] = "combinations",
+    iterate: bool = False,
+    light_time: bool = True,
+    propagator: Propagator = PYOORB(),
+) -> Tuple[FittedOrbits, FittedOrbitMembers]:
     """
     Run initial orbit determination on a set of observations believed to belong to a single
     object.
@@ -265,49 +270,12 @@ def iod(
     if len(observations) == 0:
         processable = False
 
-    # Extract column names
-    obs_id_col = "obs_id"
-    time_col = "mjd_utc"
-    ra_col = "RA_deg"
-    dec_col = "Dec_deg"
-    ra_err_col = "RA_sigma_deg"
-    dec_err_col = "Dec_sigma_deg"
-    obs_code_col = "observatory_code"
-    obs_x_col = "obs_x"
-    obs_y_col = "obs_y"
-    obs_z_col = "obs_z"
-
-    # Extract observation IDs, sky-plane positions, sky-plane position uncertainties, times of observation,
-    # and the location of the observer at each time
-    obs_ids_all = observations[obs_id_col].values
-    coords_all = observations[[ra_col, dec_col]].values
-    sigmas_all = observations[[ra_err_col, dec_err_col]].values
-    coords_obs_all = observations[[obs_x_col, obs_y_col, obs_z_col]].values
-    times_all = observations[time_col].values
-    times_all = Time(times_all, scale="utc", format="mjd")
-
-    observers_list = []
-    for observatory_code in observations["observatory_code"].unique():
-        observers_list.append(
-            Observers.from_code(
-                observatory_code,
-                Timestamp.from_mjd(
-                    observations[
-                        observations["observatory_code"].isin([observatory_code])
-                    ]["mjd_utc"].unique(),
-                    scale="utc",
-                ),
-            )
-        )
-    observers = qv.concatenate(observers_list)
-    observers = observers.sort_by(
-        ["coordinates.time.days", "coordinates.time.nanos", "code"]
-    )
-
-    if propagator == "PYOORB":
-        prop = PYOORB(**propagator_kwargs)
-    else:
-        raise ValueError(f"Invalid propagator '{propagator}'.")
+    obs_ids_all = observations.detections.id.to_numpy(zero_copy_only=False)
+    coords_all = observations.to_spherical_coordinates()
+    observers_with_states = observations.get_observers()
+    observers = observers_with_states.observers
+    coords_obs_all = observers_with_states.observers.coordinates.r
+    times_all = coords_all.time.mjd().to_numpy(zero_copy_only=False)
 
     chi2_sol = 1e10
     orbit_sol = None
@@ -322,7 +290,7 @@ def iod(
     num_outliers = np.maximum(np.minimum(num_obs - min_obs, num_outliers), 0)
 
     # Select observation IDs to use for IOD
-    obs_ids = selectObservations(
+    obs_ids = select_observations(
         observations,
         method=observation_selection_method,
     )
@@ -341,14 +309,14 @@ def iod(
 
         # Grab sky-plane positions of the selected observations, the heliocentric ecliptic position of the observer,
         # and the times at which the observations occur
-        coords = coords_all[mask, :]
+        coords = coords_all.apply_mask(mask)
         coords_obs = coords_obs_all[mask, :]
         times = times_all[mask]
 
         # Run IOD
         iod_orbits = gaussIOD(
-            coords,
-            times.utc.mjd,
+            coords.values[:, 1:3],
+            times,
             coords_obs,
             light_time=light_time,
             iterate=iterate,
@@ -360,23 +328,22 @@ def iod(
             continue
 
         # Propagate initial orbit to all observation times
-        ephemeris = prop._generate_ephemeris(iod_orbits, observers)
-        ephemeris = _ephemeris_to_dataframe(ephemeris)
+        ephemeris = propagator.generate_ephemeris(
+            iod_orbits, observers, chunk_size=1, max_processes=1
+        )
 
         # For each unique initial orbit calculate residuals and chi-squared
         # Find the orbit which yields the lowest chi-squared
         orbit_ids = iod_orbits.orbit_id.to_numpy(zero_copy_only=False)
         for i, orbit_id in enumerate(orbit_ids):
-            ephemeris_orbit = ephemeris[ephemeris["orbit_id"] == orbit_id]
+            ephemeris_orbit = ephemeris.select("orbit_id", orbit_id)
 
             # Calculate residuals and chi2
-            residuals, stats = calcResiduals(
+            residuals = Residuals.calculate(
                 coords_all,
-                ephemeris_orbit[["RA_deg", "Dec_deg"]].values,
-                sigmas_actual=sigmas_all,
-                include_probabilistic=False,
+                ephemeris_orbit.coordinates,
             )
-            chi2 = stats[0]
+            chi2 = residuals.chi2.to_numpy()
             chi2_total = np.sum(chi2)
             rchi2 = chi2_total / (2 * num_obs - 6)
 
@@ -403,7 +370,7 @@ def iod(
                 rchi2_sol = rchi2
                 residuals_sol = residuals
                 outliers = np.array([])
-                arc_length = times_all.utc.mjd.max() - times_all.utc.mjd.min()
+                arc_length = times_all.max() - times_all.min()
                 converged = True
                 break
 
@@ -431,10 +398,7 @@ def iod(
                     rchi2_new = chi2_new / (2 * num_obs_new - 6)
 
                     ids_mask = np.isin(obs_ids_all, obs_id_outlier, invert=True)
-                    arc_length = (
-                        times_all[ids_mask].utc.mjd.max()
-                        - times_all[ids_mask].utc.mjd.min()
-                    )
+                    arc_length = times_all[ids_mask].max() - times_all[ids_mask].min()
 
                     # If the updated reduced chi2 total is lower than our desired
                     # threshold, accept the soluton. If not, keep going.
@@ -448,8 +412,7 @@ def iod(
                         num_obs = num_obs_new
                         ids_mask = np.isin(obs_ids_all, outliers, invert=True)
                         arc_length = (
-                            times_all[ids_mask].utc.mjd.max()
-                            - times_all[ids_mask].utc.mjd.min()
+                            times_all[ids_mask].max() - times_all[ids_mask].min()
                         )
                         chi2_sol = chi2
                         converged = True
@@ -462,79 +425,49 @@ def iod(
 
     if not converged or not processable:
 
-        orbit = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "mjd_tdb",
-                "x",
-                "y",
-                "z",
-                "vx",
-                "vy",
-                "vz",
-                "arc_length",
-                "num_obs",
-                "chi2",
-                "rchi2",
-            ]
-        )
+        return FittedOrbits.empty(), FittedOrbitMembers.empty()
 
-        orbit_members = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "obs_id",
-                "residual_ra_arcsec",
-                "residual_dec_arcsec",
-                "chi2",
-                "gauss_sol",
-                "outlier",
-            ]
+    else:
+
+        orbit = FittedOrbits.from_kwargs(
+            orbit_id=orbit_sol.orbit_id,
+            object_id=orbit_sol.object_id,
+            coordinates=orbit_sol.coordinates,
+            arc_length=[arc_length],
+            num_obs=[num_obs],
+            chi2=[chi2_total_sol],
+            reduced_chi2=[rchi2_sol],
         )
 
-    else:
-        orbit = orbit_sol.to_dataframe()
-        orbit["arc_length"] = arc_length
-        orbit["num_obs"] = num_obs
-        orbit["chi2"] = chi2_total_sol
-        orbit["rchi2"] = rchi2_sol
-
-        orbit_members = pd.DataFrame(
-            {
-                "orbit_id": [
-                    orbit_sol.orbit_id[0].as_py() for i in range(len(obs_ids_all))
-                ],
-                "obs_id": obs_ids_all,
-                "residual_ra_arcsec": residuals_sol[:, 0] * 3600,
-                "residual_dec_arcsec": residuals_sol[:, 1] * 3600,
-                "chi2": chi2_sol,
-                "gauss_sol": np.zeros(len(obs_ids_all), dtype=int),
-                "outlier": np.zeros(len(obs_ids_all), dtype=int),
-            }
+        orbit_members = FittedOrbitMembers.from_kwargs(
+            orbit_id=np.full(len(obs_ids_all), orbit_sol.orbit_id[0].as_py()),
+            obs_id=obs_ids_all,
+            residuals=residuals_sol,
+            solution=np.isin(obs_ids_all, obs_ids_sol),
+            outlier=np.isin(obs_ids_all, outliers),
         )
-        orbit_members.loc[orbit_members["obs_id"].isin(outliers), "outlier"] = 1
-        orbit_members.loc[orbit_members["obs_id"].isin(obs_ids_sol), "gauss_sol"] = 1
 
     return orbit, orbit_members
 
 
-def initialOrbitDetermination(
-    observations,
-    linkage_members,
-    min_obs=6,
-    min_arc_length=1.0,
-    contamination_percentage=20.0,
-    rchi2_threshold=10**3,
-    observation_selection_method="combinations",
-    iterate=False,
-    light_time=True,
-    linkage_id_col="cluster_id",
-    identify_subsets=True,
-    propagator: Literal["PYOORB"] = "PYOORB",
+def initial_orbit_determination(
+    observations: Union[Observations, ray.ObjectRef],
+    linkage_members: Union[ClusterMembers, FittedOrbitMembers, ray.ObjectRef],
+    min_obs: int = 6,
+    min_arc_length: float = 1.0,
+    contamination_percentage: float = 20.0,
+    rchi2_threshold: float = 10**3,
+    observation_selection_method: Literal[
+        "combinations", "first+middle+last", "thirds"
+    ] = "combinations",
+    iterate: bool = False,
+    light_time: bool = True,
+    linkage_id_col: str = "cluster_id",
+    propagator: Type[Propagator] = PYOORB,
     propagator_kwargs: dict = {},
-    chunk_size=1,
-    num_jobs=1,
-    parallel_backend="cf",
-):
+    chunk_size: int = 1,
+    max_processes: Optional[int] = 1,
+) -> Tuple[FittedOrbits, FittedOrbitMembers]:
     """
     Run initial orbit determination on linkages found in observations.
 
@@ -624,226 +557,90 @@ def initialOrbitDetermination(
     time_start = time.time()
     logger.info("Running initial orbit determination...")
 
+    iod_orbits_list = []
+    iod_orbit_members_list = []
     if len(observations) > 0 and len(linkage_members) > 0:
 
-        iod_orbits_dfs = []
-        iod_orbit_members_dfs = []
-
-        start = time.time()
-        logger.debug("Merging observations on linkage members...")
-        linked_observations = linkage_members.merge(observations, on="obs_id")
-        logger.debug("Sorting observations by linkage ID and mjd_utc...")
-        linked_observations.sort_values(
-            by=[linkage_id_col, "mjd_utc"], inplace=True, ignore_index=True
-        )
-        duration = time.time() - start
-        logger.debug(f"Merging and sorting completed in {duration:.3f}s.")
-
-        start = time.time()
-        logger.debug("Grouping observations by linkage ID...")
-        grouped_observations = linked_observations.groupby(by=[linkage_id_col])
-        logger.debug("Splitting grouped observations by linkage ID...")
-        observations_split = [
-            grouped_observations.get_group(g).reset_index(drop=True)
-            for g in grouped_observations.groups
-        ]
-        duration = time.time() - start
-        logger.debug(f"Grouping and splitting completed in {duration:.3f}s.")
-
-        parallel, num_workers = _checkParallel(num_jobs, parallel_backend)
-        if parallel:
+        # Extract linkage IDs
+        linkage_ids = linkage_members.column(linkage_id_col).unique()
 
-            # The number of linkages that need to be fit for an initial orbit
-            num_linkages = linkage_members[linkage_id_col].nunique()
+        if max_processes is None or max_processes > 1:
 
-            if parallel_backend == "ray":
+            if not ray.is_initialized():
+                ray.init(address="auto")
 
-                import ray
+            observations_ref = ray.put(observations)
+            linkage_members_ref = ray.put(linkage_members)
 
-                if not ray.is_initialized():
-                    ray.init(address="auto")
-
-                iod_worker_ray = ray.remote(iod_worker)
-                iod_worker_ray = iod_worker_ray.options(num_returns=2, num_cpus=1)
-
-                # Send up to chunk_size linkages to each IOD worker for processing
-                chunk_size_ = calcChunkSize(
-                    num_linkages, num_workers, chunk_size, min_chunk_size=1
-                )
-                logger.info(
-                    f"Distributing linkages in chunks of {chunk_size_} to {num_workers} ray workers."
-                )
-
-                # Put the observations into ray's local object storage ("plasma")
-                observation_oids = []
-                for observations_i in yieldChunks(observations_split, chunk_size_):
-                    observation_oids.append(ray.put(observations_i))
-
-                iod_orbits_oids = []
-                iod_orbit_members_oids = []
-                for observations_oid in observation_oids:
-
-                    iod_orbits_oid, iod_orbit_members_oid = iod_worker_ray.remote(
-                        observations_oid,
-                        observation_selection_method=observation_selection_method,
-                        rchi2_threshold=rchi2_threshold,
+            futures = []
+            for linkage_id_chunk in _iterate_chunks(linkage_ids, chunk_size):
+                futures.append(
+                    iod_worker_remote.remote(
+                        linkage_id_chunk,
+                        observations_ref,
+                        linkage_members_ref,
                         min_obs=min_obs,
                         min_arc_length=min_arc_length,
                         contamination_percentage=contamination_percentage,
-                        iterate=iterate,
-                        light_time=light_time,
-                        linkage_id_col=linkage_id_col,
-                        propagator=propagator,
-                        propagator_kwargs=propagator_kwargs,
-                    )
-                    iod_orbits_oids.append(iod_orbits_oid)
-                    iod_orbit_members_oids.append(iod_orbit_members_oid)
-
-                iod_orbits_dfs = ray.get(iod_orbits_oids)
-                iod_orbit_members_dfs = ray.get(iod_orbit_members_oids)
-
-            elif parallel_backend == "mp":
-
-                chunk_size_ = calcChunkSize(
-                    num_linkages, num_workers, chunk_size, min_chunk_size=1
-                )
-                logger.info(
-                    f"Distributing linkages in chunks of {chunk_size_} to {num_workers} workers."
-                )
-
-                p = mp.Pool(
-                    processes=num_workers,
-                    initializer=_initWorker,
-                )
-
-                results = p.starmap(
-                    partial(
-                        iod_worker,
-                        observation_selection_method=observation_selection_method,
                         rchi2_threshold=rchi2_threshold,
-                        min_obs=min_obs,
-                        min_arc_length=min_arc_length,
-                        contamination_percentage=contamination_percentage,
+                        observation_selection_method=observation_selection_method,
                         iterate=iterate,
                         light_time=light_time,
                         linkage_id_col=linkage_id_col,
                         propagator=propagator,
                         propagator_kwargs=propagator_kwargs,
-                    ),
-                    zip(yieldChunks(observations_split, chunk_size_)),
+                    )
                 )
-                p.close()
-
-                results = list(zip(*results))
-                iod_orbits_dfs = results[0]
-                iod_orbit_members_dfs = results[1]
-
-            elif parallel_backend == "cf":
-                with cf.ProcessPoolExecutor(
-                    max_workers=num_workers, initializer=_initWorker
-                ) as executor:
-                    futures = []
-                    for observations_i in yieldChunks(observations_split, chunk_size):
-                        futures.append(
-                            executor.submit(
-                                iod_worker,
-                                observations_i,
-                                observation_selection_method=observation_selection_method,
-                                rchi2_threshold=rchi2_threshold,
-                                min_obs=min_obs,
-                                min_arc_length=min_arc_length,
-                                contamination_percentage=contamination_percentage,
-                                iterate=iterate,
-                                light_time=light_time,
-                                linkage_id_col=linkage_id_col,
-                                propagator=propagator,
-                                propagator_kwargs=propagator_kwargs,
-                            )
-                        )
 
-                    iod_orbits_dfs = []
-                    iod_orbit_members_dfs = []
-                    for f in cf.as_completed(futures):
-                        iod_orbits_df, iod_orbit_members_df = f.result()
-                        iod_orbits_dfs.append(iod_orbits_df)
-                        iod_orbit_members_dfs.append(iod_orbit_members_df)
-            else:
-                raise ValueError(
-                    f"Unknown parallel backend: {parallel_backend}. Must be one of: 'ray', 'mp', 'cf'."
-                )
+            while futures:
+                finished, futures = ray.wait(futures, num_returns=1)
+                result = ray.get(finished[0])
+                iod_orbits_list.append(result[0])
+                iod_orbit_members_list.append(result[1])
 
         else:
-
-            for observations_i in yieldChunks(observations_split, chunk_size):
-                iod_orbits_df, iod_orbit_members_df = iod_worker(
-                    observations_i,
-                    observation_selection_method=observation_selection_method,
-                    rchi2_threshold=rchi2_threshold,
+            for linkage_id_chunk in _iterate_chunks(linkage_ids, chunk_size):
+                iod_orbits_chunk, iod_orbit_members_chunk = iod_worker(
+                    linkage_id_chunk,
+                    observations,
+                    linkage_members,
                     min_obs=min_obs,
                     min_arc_length=min_arc_length,
                     contamination_percentage=contamination_percentage,
+                    rchi2_threshold=rchi2_threshold,
+                    observation_selection_method=observation_selection_method,
                     iterate=iterate,
                     light_time=light_time,
                     linkage_id_col=linkage_id_col,
                     propagator=propagator,
                     propagator_kwargs=propagator_kwargs,
                 )
-                iod_orbits_dfs.append(iod_orbits_df)
-                iod_orbit_members_dfs.append(iod_orbit_members_df)
-
-        iod_orbits = pd.concat(iod_orbits_dfs, ignore_index=True)
-        iod_orbit_members = pd.concat(iod_orbit_members_dfs, ignore_index=True)
-
-        for col in ["num_obs"]:
-            iod_orbits[col] = iod_orbits[col].astype(int)
-        for col in ["gauss_sol", "outlier"]:
-            iod_orbit_members[col] = iod_orbit_members[col].astype(int)
+                iod_orbits_list.append(iod_orbits_chunk)
+                iod_orbit_members_list.append(iod_orbit_members_chunk)
+
+        iod_orbits = qv.concatenate(iod_orbits_list)
+        iod_orbit_members = qv.concatenate(iod_orbit_members_list)
+
+        iod_orbits, iod_orbit_members = iod_orbits.drop_duplicates(
+            iod_orbit_members,
+            subset=[
+                "coordinates.time.days",
+                "coordinates.time.nanos",
+                "coordinates.x",
+                "coordinates.y",
+                "coordinates.z",
+                "coordinates.vx",
+                "coordinates.vy",
+                "coordinates.vz",
+            ],
+            keep="first",
+        )
 
         logger.info("Found {} initial orbits.".format(len(iod_orbits)))
 
-        if identify_subsets and len(iod_orbits) > 0:
-            iod_orbits, iod_orbit_members = identifySubsetLinkages(
-                iod_orbits, iod_orbit_members, linkage_id_col="orbit_id"
-            )
-            logger.info(
-                "{} subset orbits identified.".format(
-                    len(iod_orbits[~iod_orbits["subset_of"].isna()])
-                )
-            )
-
-        iod_orbits, iod_orbit_members = sortLinkages(
-            iod_orbits, iod_orbit_members, observations, linkage_id_col="orbit_id"
-        )
-
     else:
-        iod_orbits = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "mjd_tdb",
-                "x",
-                "y",
-                "z",
-                "vx",
-                "vy",
-                "vz",
-                "arc_length",
-                "num_obs",
-                "chi2",
-                "rchi2",
-            ]
-        )
-
-        iod_orbit_members = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "obs_id",
-                "residual_ra_arcsec",
-                "residual_dec_arcsec",
-                "chi2",
-                "gauss_sol",
-                "outlier",
-            ]
-        )
+        iod_orbits = FittedOrbits.empty()
+        iod_orbit_members = FittedOrbitMembers.empty()
 
     time_end = time.time()
     logger.info(
diff --git a/thor/orbits/iterators.py b/thor/orbits/iterators.py
index d4e8b074..ece94ee3 100644
--- a/thor/orbits/iterators.py
+++ b/thor/orbits/iterators.py
@@ -1,7 +1,10 @@
 import numpy as np
-from adam_core import dynamics
+from adam_core.constants import Constants as c
+from adam_core.dynamics.lagrange import (
+    apply_lagrange_coefficients,
+    calc_lagrange_coefficients,
+)
 
-from ..constants import Constants as c
 from .state_transition import calcStateTransitionMatrix
 
 __all__ = ["iterateStateTransition"]
@@ -100,10 +103,10 @@ def iterateStateTransition(
             # differential equation:
             #   d\chi / dt = \sqrt{mu} / r
             # and the corresponding state vector
-            lagrange_coeffs, stumpff_coeffs, chi = dynamics.calc_lagrange_coefficients(
+            lagrange_coeffs, stumpff_coeffs, chi = calc_lagrange_coefficients(
                 r, v, dt, mu=mu, max_iter=max_iter, tol=tol
             )
-            r_new, v_new = dynamics.apply_lagrange_coefficients(r, v, *lagrange_coeffs)
+            r_new, v_new = apply_lagrange_coefficients(r, v, *lagrange_coeffs)
 
             # Calculate the state transition matrix
             STM = calcStateTransitionMatrix(
diff --git a/thor/orbits/od.py b/thor/orbits/od.py
index eca015a2..e7fbc153 100644
--- a/thor/orbits/od.py
+++ b/thor/orbits/od.py
@@ -1,81 +1,62 @@
-import os
-from typing import Literal
-
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["OPENBLAS_NUM_THREADS"] = "1"
-os.environ["MKL_NUM_THREADS"] = "1"
-os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
-os.environ["NUMEXPR_NUM_THREADS"] = "1"
-
-import concurrent.futures as cf
-import copy
 import logging
-import multiprocessing as mp
 import time
-from functools import partial
+from typing import Literal, Optional, Tuple
 
 import numpy as np
-import pandas as pd
+import numpy.typing as npt
+import pyarrow.compute as pc
 import quivr as qv
+import ray
 from adam_core.coordinates import CartesianCoordinates, CoordinateCovariances
-from adam_core.observers import Observers
+from adam_core.coordinates.residuals import Residuals
 from adam_core.orbits import Orbits
-from adam_core.propagator import PYOORB
-from adam_core.time import Timestamp
+from adam_core.propagator import PYOORB, _iterate_chunks
 from scipy.linalg import solve
 
-from ..utils import (
-    _checkParallel,
-    _initWorker,
-    calcChunkSize,
-    sortLinkages,
-    yieldChunks,
-)
-from .residuals import calcResiduals
-from .utils import _ephemeris_to_dataframe
+from ..observations.observations import Observations
+from ..orbit_determination import FittedOrbitMembers, FittedOrbits
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["od_worker", "od", "differentialCorrection"]
+__all__ = ["differential_correction"]
 
 
 def od_worker(
-    orbits_list,
-    observations_list,
-    rchi2_threshold=100,
-    min_obs=5,
-    min_arc_length=1.0,
-    contamination_percentage=20,
-    delta=1e-6,
-    max_iter=20,
-    method="central",
-    fit_epoch=False,
+    orbit_ids: npt.NDArray[np.str_],
+    orbits: FittedOrbits,
+    orbit_members: FittedOrbitMembers,
+    observations: Observations,
+    rchi2_threshold: float = 100,
+    min_obs: int = 5,
+    min_arc_length: float = 1.0,
+    contamination_percentage: float = 0.0,
+    delta: float = 1e-6,
+    max_iter: int = 20,
+    method: Literal["central", "finite"] = "central",
+    fit_epoch: bool = False,
     propagator: Literal["PYOORB"] = "PYOORB",
     propagator_kwargs: dict = {},
-):
-    od_orbits_dfs = []
-    od_orbit_members_dfs = []
-    for orbit, observations in zip(orbits_list, observations_list):
-        try:
-            assert orbit.orbit_id[0].as_py() == observations["orbit_id"].unique()[0]
-            assert np.all(
-                sorted(observations["mjd_utc"].values) == observations["mjd_utc"].values
-            )
-            assert len(np.unique(observations["mjd_utc"].values)) == len(
-                observations["mjd_utc"].values
-            )
-        except:
-            err = (
-                "Invalid observations and orbit have been passed to the OD code.\n"
-                "Orbit ID: {}".format(orbit.orbit_id[0].as_py())
-            )
-            raise ValueError(err)
+) -> Tuple[FittedOrbits, FittedOrbitMembers]:
+
+    od_orbits_list = []
+    od_orbit_members_list = []
+
+    for orbit_id in orbit_ids:
 
         time_start = time.time()
-        logger.debug(f"Differentially correcting orbit {orbit.orbit_id[0].as_py()}...")
+        logger.debug(f"Differentially correcting orbit {orbit_id}...")
+
+        orbit = orbits.select("orbit_id", orbit_id)
+        obs_ids = orbit_members.apply_mask(
+            pc.equal(orbit_members.orbit_id, orbit_id)
+        ).obs_id
+        orbit_observations = observations.apply_mask(
+            pc.is_in(observations.detections.id, obs_ids)
+        )
+
         od_orbit, od_orbit_members = od(
             orbit,
-            observations,
+            orbit_observations,
             rchi2_threshold=rchi2_threshold,
             min_obs=min_obs,
             min_arc_length=min_arc_length,
@@ -89,32 +70,35 @@ def od_worker(
         )
         time_end = time.time()
         duration = time_end - time_start
-        logger.debug(
-            f"OD for orbit {orbit.orbit_id[0].as_py()} completed in {duration:.3f}s."
-        )
+        logger.debug(f"OD for orbit {orbit_id} completed in {duration:.3f}s.")
 
-        od_orbits_dfs.append(od_orbit)
-        od_orbit_members_dfs.append(od_orbit_members)
+        od_orbits_list.append(od_orbit)
+        od_orbit_members_list.append(od_orbit_members)
 
-    od_orbits = pd.concat(od_orbits_dfs, ignore_index=True)
-    od_orbit_members = pd.concat(od_orbit_members_dfs, ignore_index=True)
+    od_orbits = qv.concatenate(od_orbits_list)
+    od_orbit_members = qv.concatenate(od_orbit_members_list)
     return od_orbits, od_orbit_members
 
 
+od_worker_remote = ray.remote(od_worker)
+od_worker_remote.options(num_returns=1, num_cpus=1)
+
+
 def od(
-    orbit,
-    observations,
-    rchi2_threshold=100,
-    min_obs=5,
-    min_arc_length=1.0,
-    contamination_percentage=0.0,
-    delta=1e-6,
-    max_iter=20,
-    method="central",
-    fit_epoch=False,
+    orbit: FittedOrbits,
+    observations: Observations,
+    rchi2_threshold: float = 100,
+    min_obs: int = 5,
+    min_arc_length: float = 1.0,
+    contamination_percentage: float = 0.0,
+    delta: float = 1e-6,
+    max_iter: int = 20,
+    method: Literal["central", "finite"] = "central",
+    fit_epoch: bool = False,
     propagator: Literal["PYOORB"] = "PYOORB",
     propagator_kwargs: dict = {},
-):
+) -> Tuple[FittedOrbits, FittedOrbitMembers]:
+
     if propagator == "PYOORB":
         prop = PYOORB(**propagator_kwargs)
     else:
@@ -124,30 +108,12 @@ def od(
         err = "method should be one of 'central' or 'finite'."
         raise ValueError(err)
 
-    observables = ["RA_deg", "Dec_deg"]
-
-    obs_ids_all = observations["obs_id"].values
-    coords = observations[observables].values
-    coords_sigma = observations[["RA_sigma_deg", "Dec_sigma_deg"]].values
-
-    # Create Observers table
-    observers_list = []
-    for observatory_code in observations["observatory_code"].unique():
-        observers_list.append(
-            Observers.from_code(
-                observatory_code,
-                Timestamp.from_mjd(
-                    observations[
-                        observations["observatory_code"].isin([observatory_code])
-                    ]["mjd_utc"].unique(),
-                    scale="utc",
-                ),
-            )
-        )
-    observers = qv.concatenate(observers_list)
-    observers = observers.sort_by(
-        ["coordinates.time.days", "coordinates.time.nanos", "code"]
-    )
+    obs_ids_all = observations.detections.id.to_numpy(zero_copy_only=False)
+    coords = observations.to_spherical_coordinates()
+    coords_sigma = coords.covariance.sigmas[:, 1:3]
+    observers_with_states = observations.get_observers()
+    observers = observers_with_states.observers
+    times_all = coords.time.mjd().to_numpy(zero_copy_only=False)
 
     # FLAG: can we stop iterating to find a solution?
     converged = False
@@ -175,20 +141,23 @@ def od(
         # Calculate chi2 for residuals on the given observations
         # for the current orbit, the goal is for the orbit to improve
         # such that the chi2 improves
-        orbit_prev_ = copy.deepcopy(orbit)
+        orbit_prev_ = orbit.to_orbits()
 
-        ephemeris_prev_ = prop._generate_ephemeris(orbit_prev_, observers)
-        ephemeris_prev_ = _ephemeris_to_dataframe(ephemeris_prev_)
-        # TODO: replace with newer types
+        ephemeris_prev_ = prop.generate_ephemeris(
+            orbit_prev_, observers, chunk_size=1, max_processes=1
+        )
 
-        residuals_prev_, stats_prev_ = calcResiduals(
+        # Calculate residuals and chi2
+        residuals_prev_ = Residuals.calculate(
             coords,
-            ephemeris_prev_[observables].values,
-            sigmas_actual=coords_sigma,
-            include_probabilistic=False,
+            ephemeris_prev_.coordinates,
         )
+        residuals_prev_array = np.stack(
+            residuals_prev_.values.to_numpy(zero_copy_only=False)
+        )[:, 1:3]
+
         num_obs_ = len(observations)
-        chi2_prev_ = stats_prev_[0]
+        chi2_prev_ = residuals_prev_.chi2.to_numpy()
         chi2_total_prev_ = np.sum(chi2_prev_)
         rchi2_prev_ = np.sum(chi2_prev_) / (2 * num_obs - 6)
 
@@ -203,7 +172,7 @@ def od(
         rchi2_prev = rchi2_prev_
 
         ids_mask = np.array([True for i in range(num_obs)])
-        times_all = ephemeris_prev["mjd_utc"].values
+        times_all = ephemeris_prev.coordinates.time.mjd().to_numpy()
         obs_id_outlier = []
         delta_prev = delta
         iterations = 0
@@ -250,16 +219,14 @@ def od(
         else:
             num_params = 6
 
-        A = np.zeros((coords.shape[1], num_params, num_obs))
+        A = np.zeros((2, num_params, num_obs))
         ATWA = np.zeros((num_params, num_params, num_obs))
         ATWb = np.zeros((num_params, 1, num_obs))
 
         # Generate ephemeris with current nominal orbit
-        ephemeris_nom = prop._generate_ephemeris(orbit_prev, observers)
-        ephemeris_nom = _ephemeris_to_dataframe(ephemeris_nom)
-        # TODO: replace with newer types
-
-        coords_nom = ephemeris_nom[observables].values
+        ephemeris_nom = prop.generate_ephemeris(
+            orbit_prev, observers, chunk_size=1, max_processes=1
+        )
 
         # Modify each component of the state by a small delta
         d = np.zeros((1, 7))
@@ -301,10 +268,9 @@ def od(
             )
 
             # Calculate the modified ephemerides
-            ephemeris_mod_p = prop._generate_ephemeris(orbit_iter_p, observers)
-            ephemeris_mod_p = _ephemeris_to_dataframe(ephemeris_mod_p)
-            # TODO: replace with newer types
-            coords_mod_p = ephemeris_mod_p[observables].values
+            ephemeris_mod_p = prop.generate_ephemeris(
+                orbit_iter_p, observers, chunk_size=1, max_processes=1
+            )
 
             delta_denom = d[0, i]
             if method == "central":
@@ -326,27 +292,28 @@ def od(
                 )
 
                 # Calculate the modified ephemerides
-                ephemeris_mod_n = prop._generate_ephemeris(orbit_iter_n, observers)
-                ephemeris_mod_n = _ephemeris_to_dataframe(ephemeris_mod_n)
-                # TODO: replace with newer types
-                coords_mod_n = ephemeris_mod_n[observables].values
+                ephemeris_mod_n = prop.generate_ephemeris(
+                    orbit_iter_n, observers, chunk_size=1, max_processes=1
+                )
 
                 delta_denom *= 2
 
             else:
-                coords_mod_n = coords_nom
+                ephemeris_mod_n = ephemeris_nom
 
-            residuals_mod, _ = calcResiduals(
-                coords_mod_p,
-                coords_mod_n,
-                sigmas_actual=None,
-                include_probabilistic=False,
+            residuals_mod = Residuals.calculate(
+                ephemeris_mod_p.coordinates,
+                ephemeris_mod_n.coordinates,
             )
+            residuals_mod = np.stack(
+                residuals_mod.values.to_numpy(zero_copy_only=False)
+            )
+            residuals_mod_array = residuals_mod[:, 1:3]
 
             for n in range(num_obs):
                 try:
                     A[:, i : i + 1, n] = (
-                        residuals_mod[ids_mask][n : n + 1].T / delta_denom
+                        residuals_mod_array[ids_mask][n : n + 1].T / delta_denom
                     )
                 except RuntimeError:
                     print(orbit_prev.orbit_id)
@@ -354,7 +321,7 @@ def od(
         for n in range(num_obs):
             W = np.diag(1 / coords_sigma[n] ** 2)
             ATWA[:, :, n] = A[:, :, n].T @ W @ A[:, :, n]
-            ATWb[:, :, n] = A[:, :, n].T @ W @ residuals_prev[n : n + 1].T
+            ATWb[:, :, n] = A[:, :, n].T @ W @ residuals_prev_array[n : n + 1].T
 
         ATWA = np.sum(ATWA, axis=2)
         ATWb = np.sum(ATWb, axis=2)
@@ -444,14 +411,12 @@ def od(
             continue
 
         # Generate ephemeris with current nominal orbit
-        ephemeris_iter = prop._generate_ephemeris(orbit_iter, observers)
-        ephemeris_iter = _ephemeris_to_dataframe(ephemeris_iter)
-        coords_iter = ephemeris_iter[observables].values
-
-        residuals, stats = calcResiduals(
-            coords, coords_iter, sigmas_actual=coords_sigma, include_probabilistic=False
+        ephemeris_iter = prop.generate_ephemeris(
+            orbit_iter, observers, chunk_size=1, max_processes=1
         )
-        chi2_iter = stats[0]
+
+        residuals = Residuals.calculate(coords, ephemeris_iter.coordinates)
+        chi2_iter = residuals.chi2.to_numpy()
         chi2_total_iter = np.sum(chi2_iter[ids_mask])
         rchi2_iter = chi2_total_iter / (2 * num_obs - num_params)
         arc_length = times_all[ids_mask].max() - times_all[ids_mask].min()
@@ -541,94 +506,58 @@ def od(
 
     if not solution_found or not processable or first_solution:
 
-        od_orbit = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "mjd_tdb",
-                "x",
-                "y",
-                "z",
-                "vx",
-                "vy",
-                "vz",
-                "covariance",
-                "r",
-                "r_sigma",
-                "v",
-                "v_sigma",
-                "arc_length",
-                "num_obs",
-                "num_params",
-                "num_iterations",
-                "chi2",
-                "rchi2",
-                "improved",
-            ]
-        )
+        od_orbit = FittedOrbits.empty()
+        od_orbit_members = FittedOrbitMembers.empty()
 
-        od_orbit_members = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "obs_id",
-                "residual_ra_arcsec",
-                "residual_dec_arcsec",
-                "chi2",
-                "outlier",
-            ]
+    else:
+
+        obs_times = observations.detections.time.mjd().to_numpy()[ids_mask]
+        arc_length_ = obs_times.max() - obs_times.min()
+        assert arc_length == arc_length_
+
+        od_orbit = FittedOrbits.from_kwargs(
+            orbit_id=orbit_prev.orbit_id,
+            object_id=orbit_prev.object_id,
+            coordinates=orbit_prev.coordinates,
+            arc_length=[arc_length_],
+            num_obs=[num_obs],
+            chi2=[chi2_total_prev],
+            reduced_chi2=[rchi2_prev],
+            improved=[improved],
         )
 
-    else:
-        obs_times = observations["mjd_utc"].values[ids_mask]
-        od_orbit = orbit_prev.to_dataframe()
-        od_orbit["r"] = orbit_prev.coordinates.r_mag
-        od_orbit["r_sigma"] = orbit_prev.coordinates.sigma_r_mag
-        od_orbit["v"] = orbit_prev.coordinates.v_mag
-        od_orbit["v_sigma"] = orbit_prev.coordinates.sigma_v_mag
-        od_orbit["arc_length"] = np.max(obs_times) - np.min(obs_times)
-        od_orbit["num_obs"] = num_obs
-        od_orbit["num_params"] = num_params
-        od_orbit["num_iterations"] = iterations
-        od_orbit["chi2"] = chi2_total_prev
-        od_orbit["rchi2"] = rchi2_prev
-        od_orbit["improved"] = improved
-
-        od_orbit_members = pd.DataFrame(
-            {
-                "orbit_id": [
-                    orbit_prev.orbit_id[0].as_py() for i in range(len(obs_ids_all))
-                ],
-                "obs_id": obs_ids_all,
-                "residual_ra_arcsec": residuals_prev[:, 0] * 3600,
-                "residual_dec_arcsec": residuals_prev[:, 1] * 3600,
-                "chi2": chi2_prev,
-                "outlier": np.zeros(len(obs_ids_all), dtype=int),
-            }
+        # od_orbit["num_params"] = num_params
+        # od_orbit["num_iterations"] = iterations
+        # od_orbit["improved"] = improved
+
+        od_orbit_members = FittedOrbitMembers.from_kwargs(
+            orbit_id=np.full(len(obs_ids_all), orbit_prev.orbit_id[0].as_py()),
+            obs_id=obs_ids_all,
+            residuals=residuals_prev,
+            solution=np.isin(obs_ids_all, obs_id_outlier, invert=True),
+            outlier=np.isin(obs_ids_all, obs_id_outlier),
         )
-        od_orbit_members.loc[
-            od_orbit_members["obs_id"].isin(obs_id_outlier), "outlier"
-        ] = 1
 
     return od_orbit, od_orbit_members
 
 
-def differentialCorrection(
-    orbits,
-    orbit_members,
-    observations,
-    min_obs=5,
-    min_arc_length=1.0,
-    contamination_percentage=20,
-    rchi2_threshold=100,
-    delta=1e-8,
-    max_iter=20,
-    method="central",
-    fit_epoch=False,
+def differential_correction(
+    orbits: FittedOrbits,
+    orbit_members: FittedOrbitMembers,
+    observations: Observations,
+    min_obs: int = 5,
+    min_arc_length: float = 1.0,
+    contamination_percentage: float = 20,
+    rchi2_threshold: float = 100,
+    delta: float = 1e-8,
+    max_iter: int = 20,
+    method: Literal["central", "finite"] = "central",
+    fit_epoch: bool = False,
     propagator: Literal["PYOORB"] = "PYOORB",
     propagator_kwargs: dict = {},
-    chunk_size=10,
-    num_jobs=60,
-    parallel_backend="cf",
-):
+    chunk_size: int = 10,
+    max_processes: Optional[int] = 1,
+) -> Tuple[FittedOrbits, FittedOrbitMembers]:
     """
     Differentially correct (via finite/central differencing).
 
@@ -648,103 +577,27 @@ def differentialCorrection(
 
     if len(orbits) > 0 and len(orbit_members) > 0:
 
-        orbits_, orbit_members_ = sortLinkages(orbits, orbit_members, observations)
+        orbit_ids = orbits.orbit_id.to_numpy(zero_copy_only=False)
 
-        start = time.time()
-        logger.debug("Merging observations on linkage members...")
-        linked_observations = orbit_members_[
-            orbit_members_[["orbit_id", "obs_id"]]["orbit_id"].isin(
-                orbits_["orbit_id"].values
-            )
-        ].merge(observations, on="obs_id", how="left")
-        duration = time.time() - start
-        logger.debug(f"Merging completed in {duration:.3f}s.")
-
-        start = time.time()
-        logger.debug("Grouping observations by orbit ID...")
-        grouped_observations = linked_observations.groupby(by=["orbit_id"])
-        logger.debug("Splitting grouped observations by orbit ID...")
-        observations_split = [
-            grouped_observations.get_group(g).reset_index(drop=True)
-            for g in grouped_observations.groups
-        ]
-        duration = time.time() - start
-        logger.debug(f"Grouping and splitting completed in {duration:.3f}s.")
-
-        orbits_initial = Orbits.from_flat_dataframe(orbits_)
-        orbits_split = [orbit for orbit in orbits_initial]
-        num_orbits = len(orbits)
-
-        parallel, num_workers = _checkParallel(num_jobs, parallel_backend)
-        if num_workers > 1:
-
-            if parallel_backend == "ray":
-                import ray
-
-                if not ray.is_initialized():
-                    ray.init(address="auto")
-
-                od_worker_ray = ray.remote(od_worker)
-                od_worker_ray = od_worker_ray.options(num_returns=2, num_cpus=1)
-
-                # Send up to chunk_size orbits to each OD worker for processing
-                chunk_size_ = calcChunkSize(
-                    num_orbits, num_workers, chunk_size, min_chunk_size=1
-                )
-                logger.info(
-                    f"Distributing linkages in chunks of {chunk_size_} to {num_workers} ray workers."
-                )
+        od_orbits_list = []
+        od_orbit_members_list = []
+        if max_processes is None or max_processes > 1:
 
-                # Put the observations and orbits into ray's local object storage ("plasma")
-                orbit_oids = []
-                observation_oids = []
-                for orbits_i, observations_i in zip(
-                    yieldChunks(orbits_split, chunk_size_),
-                    yieldChunks(observations_split, chunk_size_),
-                ):
-                    orbit_oids.append(ray.put(orbits_i))
-                    observation_oids.append(ray.put(observations_i))
-
-                od_orbits_oids = []
-                od_orbit_members_oids = []
-                for orbits_oid, observations_oid in zip(orbit_oids, observation_oids):
-
-                    od_orbits_oid, od_orbit_members_oid = od_worker_ray.remote(
-                        orbits_oid,
-                        observations_oid,
-                        rchi2_threshold=rchi2_threshold,
-                        min_obs=min_obs,
-                        min_arc_length=min_arc_length,
-                        contamination_percentage=contamination_percentage,
-                        delta=delta,
-                        max_iter=max_iter,
-                        method=method,
-                        fit_epoch=fit_epoch,
-                        propagator=propagator,
-                        propagator_kwargs=propagator_kwargs,
-                    )
-                    od_orbits_oids.append(od_orbits_oid)
-                    od_orbit_members_oids.append(od_orbit_members_oid)
+            if not ray.is_initialized():
+                ray.init(address="auto")
 
-                od_orbits_dfs = ray.get(od_orbits_oids)
-                od_orbit_members_dfs = ray.get(od_orbit_members_oids)
+            orbits_ref = ray.put(orbits)
+            orbit_members_ref = ray.put(orbit_members)
+            observations_ref = ray.put(observations)
 
-            elif parallel_backend == "mp":
-
-                chunk_size_ = calcChunkSize(
-                    num_orbits, num_workers, chunk_size, min_chunk_size=1
-                )
-                logger.info(
-                    f"Distributing linkages in chunks of {chunk_size_} to {num_workers} workers."
-                )
-
-                p = mp.Pool(
-                    processes=num_workers,
-                    initializer=_initWorker,
-                )
-                results = p.starmap(
-                    partial(
-                        od_worker,
+            futures = []
+            for orbit_ids_chunk in _iterate_chunks(orbit_ids, chunk_size):
+                futures.append(
+                    od_worker_remote.remote(
+                        orbit_ids_chunk,
+                        orbits_ref,
+                        orbit_members_ref,
+                        observations_ref,
                         rchi2_threshold=rchi2_threshold,
                         min_obs=min_obs,
                         min_arc_length=min_arc_length,
@@ -755,68 +608,23 @@ def differentialCorrection(
                         fit_epoch=fit_epoch,
                         propagator=propagator,
                         propagator_kwargs=propagator_kwargs,
-                    ),
-                    zip(
-                        yieldChunks(orbits_split, chunk_size_),
-                        yieldChunks(observations_split, chunk_size_),
-                    ),
+                    )
                 )
-                p.close()
-
-                results = list(zip(*results))
-                od_orbits_dfs = results[0]
-                od_orbit_members_dfs = results[1]
-
-            elif parallel_backend == "cf":
-                with cf.ProcessPoolExecutor(
-                    max_workers=num_workers, initializer=_initWorker
-                ) as executor:
-                    futures = []
-                    for orbits_i, observations_i in zip(
-                        yieldChunks(orbits_split, chunk_size),
-                        yieldChunks(observations_split, chunk_size),
-                    ):
-                        futures.append(
-                            executor.submit(
-                                od_worker,
-                                orbits_i,
-                                observations_i,
-                                rchi2_threshold=rchi2_threshold,
-                                min_obs=min_obs,
-                                min_arc_length=min_arc_length,
-                                contamination_percentage=contamination_percentage,
-                                delta=delta,
-                                max_iter=max_iter,
-                                method=method,
-                                fit_epoch=fit_epoch,
-                                propagator=propagator,
-                                propagator_kwargs=propagator_kwargs,
-                            )
-                        )
-                    od_orbits_dfs = []
-                    od_orbit_members_dfs = []
-                    for future in cf.as_completed(futures):
-                        od_orbits_df, od_orbit_members_df = future.result()
-                        od_orbits_dfs.append(od_orbits_df)
-                        od_orbit_members_dfs.append(od_orbit_members_df)
 
-            else:
-                raise ValueError(
-                    f"Unknown parallel backend: {parallel_backend}. Must be one of: 'ray', 'mp', 'cf'."
-                )
+            while futures:
+                finished, futures = ray.wait(futures, num_returns=1)
+                results = ray.get(finished[0])
+                od_orbits_list.append(results[0])
+                od_orbit_members_list.append(results[1])
 
         else:
 
-            od_orbits_dfs = []
-            od_orbit_members_dfs = []
-            for orbits_i, observations_i in zip(
-                yieldChunks(orbits_split, chunk_size),
-                yieldChunks(observations_split, chunk_size),
-            ):
-
-                od_orbits_df, od_orbit_members_df = od_worker(
-                    orbits_i,
-                    observations_i,
+            for orbit_ids_chunk in _iterate_chunks(orbit_ids, chunk_size):
+                od_orbits_chunk, od_orbit_members_chunk = od_worker(
+                    orbit_ids_chunk,
+                    orbits,
+                    orbit_members,
+                    observations,
                     rchi2_threshold=rchi2_threshold,
                     min_obs=min_obs,
                     min_arc_length=min_arc_length,
@@ -828,54 +636,15 @@ def differentialCorrection(
                     propagator=propagator,
                     propagator_kwargs=propagator_kwargs,
                 )
-                od_orbits_dfs.append(od_orbits_df)
-                od_orbit_members_dfs.append(od_orbit_members_df)
-
-        od_orbits = pd.concat(od_orbits_dfs, ignore_index=True)
-        od_orbit_members = pd.concat(od_orbit_members_dfs, ignore_index=True)
+                od_orbits_list.append(od_orbits_chunk)
+                od_orbit_members_list.append(od_orbit_members_chunk)
 
-        for col in ["num_obs"]:
-            od_orbits[col] = od_orbits[col].astype(int)
-        for col in ["outlier"]:
-            od_orbit_members[col] = od_orbit_members[col].astype(int)
-
-        od_orbits, od_orbit_members = sortLinkages(
-            od_orbits, od_orbit_members, observations, linkage_id_col="orbit_id"
-        )
+        od_orbits = qv.concatenate(od_orbits_list)
+        od_orbit_members = qv.concatenate(od_orbit_members_list)
 
     else:
-        od_orbits = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "mjd_tdb",
-                "x",
-                "y",
-                "z",
-                "vx",
-                "vy",
-                "vz",
-                "covariance",
-                "r",
-                "r_sigma",
-                "v",
-                "v_sigma",
-                "arc_length",
-                "num_obs",
-                "chi2",
-                "rchi2",
-            ]
-        )
-
-        od_orbit_members = pd.DataFrame(
-            columns=[
-                "orbit_id",
-                "obs_id",
-                "residual_ra_arcsec",
-                "residual_dec_arcsec",
-                "chi2",
-                "outlier",
-            ]
-        )
+        od_orbits = FittedOrbits.empty()
+        od_orbit_members = FittedOrbitMembers.empty()
 
     time_end = time.time()
     logger.info("Differentially corrected {} orbits.".format(len(od_orbits)))
diff --git a/thor/orbits/residuals.py b/thor/orbits/residuals.py
deleted file mode 100644
index 29d227df..00000000
--- a/thor/orbits/residuals.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import numpy as np
-from scipy.spatial.distance import mahalanobis
-from scipy.stats import chi2
-
-__all__ = ["calcResiduals", "calcSimpleResiduals", "calcProbabilisticResiduals"]
-
-
-def calcResiduals(
-    coords_actual,
-    coords_desired,
-    sigmas_actual=None,
-    covariances_actual=None,
-    include_probabilistic=True,
-):
-    """
-    Calculate residuals (actual - desired) and the associated chi2 if the
-    1-sigma uncertainties on the actual quantities are provided. If desired,
-    a probabilistic measure of residual can also be calculated using the 1-sigma
-    uncertainties (which are converted into a covariance matrix) or by providing the
-    coveriance matrices for each coordinate. The probabilistic statistics include
-    the chi2 for each measurement, the mahalanobis distance and the associated
-    probability (calculated as 1 - chi2.cdf(mahalanobis_distance, degrees of freedom)).
-
-    Parameters
-    ----------
-    coords_actual : `~numpy.ndarray` (N, M)
-        Actual N coordinates in M dimensions.
-    coords_desired : `~numpy.ndarray` (N, M)
-        The desired N coordinates in M dimensions.
-    sigmas_actual : `~numpy.ndarray` (N, M), optional
-        The 1-sigma uncertainties of the actual coordinates. Can be
-        None, in which case chi2 will be return as a NaN.
-    covariances_actual : list of N `~numpy.ndarray`s (M, M)
-        The covariance matrix in M dimensions for each
-        actual observation if available.
-    include_probabilistic : bool, optional
-        Include the calculation of mahalanobis distance and the associated
-        probability.
-
-    Returns
-    -------
-    residuals : `~numpy.ndarray` (N, 2)
-        Residuals for each coordinate (actual - desired)
-    stats : tuple (1 or 3)
-        chi2 :  `~numpy.ndarray` (N)
-            Chi-squared for each observation given the 1-sigma
-            uncertainties. NaN if no sigmas_actual is None.
-        p : `~numpy.ndarray` (N)
-            The probability that the actual coordinates given their uncertainty
-            belong to the same multivariate normal distribution as the desired
-            coordinates.
-        d : `~numpy.ndarray` (N)
-            The Mahalanobis distance of each coordinate compared to the desired
-            coordinates.
-    """
-    if (
-        covariances_actual is None
-        and sigmas_actual is not None
-        and include_probabilistic
-    ):
-        covariances_actual_ = [np.diag(i**2) for i in sigmas_actual]
-        sigmas_actual_ = sigmas_actual
-    elif covariances_actual is not None and sigmas_actual is None:
-        sigmas_actual_ = np.zeros_like(coords_actual)
-        for i in range(len(coords_actual)):
-            sigmas_actual_[i] = np.diagonal(covariances_actual[i])
-        sigmas_actual_ = np.sqrt(sigmas_actual_)
-    else:
-        covariances_actual_ = covariances_actual
-        sigmas_actual_ = sigmas_actual
-
-    residuals, chi2 = calcSimpleResiduals(
-        coords_actual, coords_desired, sigmas_actual=sigmas_actual_
-    )
-
-    if include_probabilistic:
-        p, d = calcProbabilisticResiduals(
-            coords_actual, coords_desired, covariances_actual_
-        )
-        stats = (chi2, p, d)
-    else:
-
-        stats = (chi2,)
-
-    return residuals, stats
-
-
-def calcSimpleResiduals(
-    coords_actual,
-    coords_desired,
-    sigmas_actual=None,
-):
-    """
-    Calculate residuals and the associated chi2 if the
-    1-sigma uncertainties on the actual quantities are provided.
-
-    Parameters
-    ----------
-    coords_actual : `~numpy.ndarray` (N, M)
-        Actual N coordinates in M dimensions.
-    coords_desired : `~numpy.ndarray` (N, M)
-        The desired N coordinates in M dimensions.
-    sigmas_actual : `~numpy.ndarray` (N, M), optional
-        The 1-sigma uncertainties of the actual coordinates. Can be
-        None, in which case chi2 will be return as a NaN.
-
-    Returns
-    -------
-    residuals : `~numpy.ndarray` (N, 2)
-        Residuals for each coordinate (actual - desired)
-    chi2 : `~numpy.ndarray` (N)
-        Chi-squared for each observation given the 1-sigma
-        uncertainties. NaN if no sigmas_actual is None.
-    """
-    residuals = np.zeros_like(coords_actual)
-
-    ra = coords_actual[:, 0]
-    dec = coords_actual[:, 1]
-    ra_pred = coords_desired[:, 0]
-    dec_pred = coords_desired[:, 1]
-
-    # Calculate residuals in RA, make sure to fix any potential wrap around errors
-    residual_ra = ra - ra_pred
-    residual_ra = np.where(residual_ra > 180.0, 360.0 - residual_ra, residual_ra)
-    residual_ra *= np.cos(np.radians(dec_pred))
-
-    # Calculate residuals in Dec
-    residual_dec = dec - dec_pred
-
-    if isinstance(sigmas_actual, np.ndarray):
-        sigma_ra = sigmas_actual[:, 0]
-        sigma_dec = sigmas_actual[:, 1]
-
-        # Calculate chi2
-        chi2 = (residual_ra**2 / sigma_ra**2) + (residual_dec**2 / sigma_dec**2)
-    else:
-        chi2 = np.NaN
-
-    residuals[:, 0] = residual_ra
-    residuals[:, 1] = residual_dec
-
-    return residuals, chi2
-
-
-def calcProbabilisticResiduals(coords_actual, coords_desired, covariances_actual):
-    """
-    Calculate the probabilistic residual.
-
-    Parameters
-    ----------
-    coords_actual : `~numpy.ndarray` (N, M)
-        Actual N coordinates in M dimensions.
-    coords_desired : `~numpy.ndarray` (N, M)
-        The desired N coordinates in M dimensions.
-    sigmas_actual : `~numpy.ndarray` (N, M)
-        The 1-sigma uncertainties of the actual coordinates.
-    covariances_actual : list of N `~numpy.ndarray`s (M, M)
-        The covariance matrix in M dimensions for each
-        actual observation if available.
-
-    Returns
-    -------
-    p : `~numpy.ndarray` (N)
-        The probability that the actual coordinates given their uncertainty
-        belong to the same multivariate normal distribution as the desired
-        coordinates.
-    d : `~numpy.ndarray` (N)
-        The Mahalanobis distance of each coordinate compared to the desired
-        coordinates.
-    """
-    d = np.zeros(len(coords_actual))
-    p = np.zeros(len(coords_actual))
-
-    for i, (actual, desired, covar) in enumerate(
-        zip(coords_actual, coords_desired, covariances_actual)
-    ):
-        # Calculate the degrees of freedom
-        k = len(actual)
-
-        # Calculate the mahalanobis distance between the two coordinates
-        d_i = mahalanobis(actual, desired, np.linalg.inv(covar))
-
-        # Calculate the probability that both sets of coordinates are drawn from
-        # the same multivariate normal
-        p_i = 1 - chi2.cdf(d_i, k)
-
-        # Append results
-        d[i] = d_i
-        p[i] = p_i
-
-    return p, d
diff --git a/thor/orbits/state_transition.py b/thor/orbits/state_transition.py
index 494ffcc8..d4df5e20 100644
--- a/thor/orbits/state_transition.py
+++ b/thor/orbits/state_transition.py
@@ -1,7 +1,6 @@
 import numpy as np
 from adam_core import dynamics
-
-from ..constants import Constants as c
+from adam_core.constants import Constants as c
 
 __all__ = ["calcMMatrix", "calcStateTransitionMatrix"]
 
diff --git a/thor/orbits/tests/test_attribution.py b/thor/orbits/tests/test_attribution.py
new file mode 100644
index 00000000..1b1276cb
--- /dev/null
+++ b/thor/orbits/tests/test_attribution.py
@@ -0,0 +1,36 @@
+import pytest
+from adam_core.observations import PointSourceDetections
+from adam_core.time import Timestamp
+
+from ...observations.observations import Observations
+from ..attribution import Attributions
+
+
+def test_Attributions_drop_coincident_attributions():
+    observations = Observations.from_kwargs(
+        detections=PointSourceDetections.from_kwargs(
+            id=["01", "02", "03", "04"],
+            exposure_id=["e01", "e01", "e02", "e02"],
+            time=Timestamp.from_mjd([59001.1, 59001.1, 59002.1, 59002.1], scale="utc"),
+            ra=[1, 2, 3, 4],
+            dec=[5, 6, 7, 8],
+            mag=[10, 11, 12, 13],
+        ),
+        state_id=[0, 0, 1, 1],
+        observatory_code=["500", "500", "500", "500"],
+    )
+
+    attributions = Attributions.from_kwargs(
+        orbit_id=["o01", "o01", "o02", "o03"],
+        obs_id=["01", "02", "03", "03"],
+        distance=[0.5 / 3600, 1 / 3600, 2 / 3600, 1 / 3600],
+    )
+
+    filtered = attributions.drop_coincident_attributions(observations)
+    # Orbit 1 gets linked to two observations at the same time
+    # We should expect to only keep the one with the smallest distance
+    # Orbit 2 and 3 get linked to the same observation but we should keep both
+    assert len(filtered) == 3
+    assert filtered.orbit_id.to_pylist() == ["o01", "o02", "o03"]
+    assert filtered.obs_id.to_pylist() == ["01", "03", "03"]
+    assert filtered.distance.to_pylist() == [0.5 / 3600, 2 / 3600, 1 / 3600]
diff --git a/thor/orbits/utils.py b/thor/orbits/utils.py
deleted file mode 100644
index 44b1b75b..00000000
--- a/thor/orbits/utils.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import pandas as pd
-from adam_core.orbits import Ephemeris
-
-
-def _ephemeris_to_dataframe(ephemeris: Ephemeris) -> pd.DataFrame:
-    ephemeris = ephemeris.sort_by(
-        [
-            "orbit_id",
-            "coordinates.time.days",
-            "coordinates.time.nanos",
-            "coordinates.origin.code",
-        ]
-    )
-    ephemeris_df = ephemeris.to_dataframe()
-    ephemeris_df.insert(2, "mjd_utc", ephemeris.coordinates.time.mjd())
-    ephemeris_df.rename(
-        columns={
-            "coordinates.lon": "RA_deg",
-            "coordinates.lat": "Dec_deg",
-            "coordinates.origin.code": "observatory_code",
-        },
-        inplace=True,
-    )
-    return ephemeris_df
diff --git a/thor/range_and_transform.py b/thor/range_and_transform.py
new file mode 100644
index 00000000..4feb63d4
--- /dev/null
+++ b/thor/range_and_transform.py
@@ -0,0 +1,213 @@
+import logging
+import time
+from typing import Optional, Type, Union
+
+import pyarrow.compute as pc
+import quivr as qv
+import ray
+from adam_core.coordinates import (
+    CartesianCoordinates,
+    OriginCodes,
+    transform_coordinates,
+)
+from adam_core.propagator import PYOORB, Propagator
+
+from .observations.observations import Observations
+from .orbit import TestOrbit, TestOrbitEphemeris
+from .projections import GnomonicCoordinates
+
+__all__ = [
+    "TransformedDetections",
+    "range_and_transform",
+]
+
+
+logger = logging.getLogger(__name__)
+
+
+class TransformedDetections(qv.Table):
+    id = qv.StringColumn()
+    coordinates = GnomonicCoordinates.as_column()
+    state_id = qv.Int64Column()
+
+
+def range_and_transform_worker(
+    ranged_detections: CartesianCoordinates,
+    observations: Observations,
+    ephemeris: TestOrbitEphemeris,
+    state_id: int,
+) -> TransformedDetections:
+    """
+    Given ranged detections and their original observations, transform these to the gnomonic tangent
+    plane centered on the motion of the test orbit for a single state.
+
+    Parameters
+    ----------
+    ranged_detections
+        Cartesian detections ranged so that their heliocentric distance is the same as the test orbit
+        for each state
+    observations
+        The observations from which the ranged detections originate. These should be sorted one-to-one
+        with the ranged detections
+    ephemeris
+        Ephemeris from which to extract the test orbit's aberrated state.
+    state_id
+        The ID for this particular state.
+
+    Returns
+    -------
+    transformed_detections
+        Detections transformed to a gnomonic tangent plane centered on the motion of the
+        test orbit.
+    """
+    # Select the detections and ephemeris for this state id
+    mask = pc.equal(state_id, observations.state_id)
+    ranged_detections_state = ranged_detections.apply_mask(mask)
+    ephemeris_state = ephemeris.select("id", state_id)
+    observations_state = observations.select("state_id", state_id)
+
+    # Transform the detections into the co-rotating frame
+    return TransformedDetections.from_kwargs(
+        id=observations_state.detections.id,
+        coordinates=GnomonicCoordinates.from_cartesian(
+            ranged_detections_state,
+            center_cartesian=ephemeris_state.ephemeris.aberrated_coordinates,
+        ),
+        state_id=observations_state.state_id,
+    )
+
+
+range_and_transform_remote = ray.remote(range_and_transform_worker)
+
+
+def range_and_transform(
+    test_orbit: TestOrbit,
+    observations: Union[Observations, ray.ObjectRef],
+    propagator: Type[Propagator] = PYOORB,
+    propagator_kwargs: dict = {},
+    max_processes: Optional[int] = 1,
+) -> TransformedDetections:
+    """
+    Range observations for a single test orbit and transform them into a
+    gnomonic projection centered on the motion of the test orbit (co-rotating
+    frame).
+
+    Parameters
+    ----------
+    test_orbit : `~thor.orbit.TestOrbit`
+        Test orbit to use to gather and transform observations.
+    observations : `~thor.observations.observations.Observations`
+        Observations from which range and transform the detections.
+    propagator : `~adam_core.propagator.propagator.Propagator`
+        Propagator to use to propagate the test orbit and generate
+        ephemerides.
+    max_processes : int, optional
+        Maximum number of processes to use for parallelization. If
+        an existing ray cluster is already running, this parameter
+        will be ignored if larger than 1 or not None.
+
+    Returns
+    -------
+    transformed_detections : `~thor.main.TransformedDetections`
+        The transformed detections as gnomonic coordinates
+        of the observations in the co-rotating frame.
+    """
+    time_start = time.perf_counter()
+    logger.info("Running range and transform...")
+    logger.info(f"Assuming r = {test_orbit.orbit.coordinates.r[0]} au")
+    logger.info(f"Assuming v = {test_orbit.orbit.coordinates.v[0]} au/d")
+
+    if isinstance(observations, ray.ObjectRef):
+        observations = ray.get(observations)
+
+    prop = propagator(**propagator_kwargs)
+
+    if len(observations) > 0:
+        # Compute the ephemeris of the test orbit (this will be cached)
+        ephemeris = test_orbit.generate_ephemeris_from_observations(
+            observations,
+            propagator=prop,
+            max_processes=max_processes,
+        )
+
+        # Assume that the heliocentric distance of all point sources in
+        # the observations are the same as that of the test orbit
+        ranged_detections_spherical = test_orbit.range_observations(
+            observations,
+            propagator=prop,
+            max_processes=max_processes,
+        )
+
+        # Transform from spherical topocentric to cartesian heliocentric coordinates
+        ranged_detections_cartesian = transform_coordinates(
+            ranged_detections_spherical.coordinates,
+            representation_out=CartesianCoordinates,
+            frame_out="ecliptic",
+            origin_out=OriginCodes.SUN,
+        )
+
+        transformed_detection_list = []
+        if max_processes is None or max_processes > 1:
+
+            if not ray.is_initialized():
+                logger.debug(
+                    f"Ray is not initialized. Initializing with {max_processes}..."
+                )
+                ray.init(num_cpus=max_processes)
+
+            if isinstance(observations, ray.ObjectRef):
+                observations_ref = observations
+                observations = ray.get(observations_ref)
+            else:
+                observations_ref = ray.put(observations)
+
+            if isinstance(ephemeris, ray.ObjectRef):
+                ephemeris_ref = ephemeris
+            else:
+                ephemeris_ref = ray.put(ephemeris)
+
+            ranged_detections_cartesian_ref = ray.put(ranged_detections_cartesian)
+
+            # Get state IDs
+            state_ids = observations.state_id.unique().sort()
+            futures = []
+            for state_id in state_ids:
+                futures.append(
+                    range_and_transform_remote.remote(
+                        ranged_detections_cartesian_ref,
+                        observations_ref,
+                        ephemeris_ref,
+                        state_id,
+                    )
+                )
+
+            while futures:
+                finished, futures = ray.wait(futures, num_returns=1)
+                transformed_detection_list.append(ray.get(finished[0]))
+
+        else:
+            # Get state IDs
+            state_ids = observations.state_id.unique().sort()
+            for state_id in state_ids:
+                mask = pc.equal(state_id, observations.state_id)
+                transformed_detection_list.append(
+                    range_and_transform_worker(
+                        ranged_detections_cartesian.apply_mask(mask),
+                        observations.select("state_id", state_id),
+                        ephemeris.select("id", state_id),
+                        state_id,
+                    )
+                )
+
+        transformed_detections = qv.concatenate(transformed_detection_list)
+        transformed_detections = transformed_detections.sort_by(by=["state_id"])
+
+    else:
+        transformed_detections = TransformedDetections.empty()
+
+    time_end = time.perf_counter()
+    logger.info(f"Transformed {len(transformed_detections)} observations.")
+    logger.info(
+        f"Range and transform completed in {time_end - time_start:.3f} seconds."
+    )
+    return transformed_detections
diff --git a/thor/tests/test_main.py b/thor/tests/test_main.py
index f0d08125..2feea461 100644
--- a/thor/tests/test_main.py
+++ b/thor/tests/test_main.py
@@ -3,10 +3,11 @@
 from adam_core.utils.helpers import make_observations, make_real_orbits
 
 from ..config import Config
-from ..main_2 import link_test_orbit, range_and_transform
+from ..main import link_test_orbit
 from ..observations import Observations
 from ..observations.filters import TestOrbitRadiusObservationFilter
 from ..orbit import TestOrbit as THORbit
+from ..range_and_transform import range_and_transform
 
 OBJECT_IDS = [
     "594913 'Aylo'chaxnim (2020 AV2)",
@@ -243,7 +244,7 @@ def test_link_test_orbit(
     assert len(recovered_orbit_members) == len(obs_ids_expected)
 
     # Ensure we get all the object IDs back that we expect
-    obs_ids_actual = recovered_orbit_members["obs_id"].values
+    obs_ids_actual = recovered_orbit_members.obs_id
     assert pc.all(pc.equal(obs_ids_actual, obs_ids_expected))
 
 
@@ -270,5 +271,5 @@ def test_benchmark_link_test_orbit(
     assert len(recovered_orbit_members) == len(obs_ids_expected)
 
     # Ensure we get all the object IDs back that we expect
-    obs_ids_actual = recovered_orbit_members["obs_id"].values
+    obs_ids_actual = recovered_orbit_members.obs_id
     assert pc.all(pc.equal(obs_ids_actual, obs_ids_expected))
diff --git a/thor/utils/__init__.py b/thor/utils/__init__.py
index e5f0e3a9..747ff773 100644
--- a/thor/utils/__init__.py
+++ b/thor/utils/__init__.py
@@ -1,7 +1,3 @@
 from .ades import *
-from .astropy import *
-from .linkages import *
 from .logging import *
-from .multiprocessing import *
-from .observations import *
-from .patches import *
+from .quivr import *
diff --git a/thor/utils/astropy.py b/thor/utils/astropy.py
deleted file mode 100644
index 3698083c..00000000
--- a/thor/utils/astropy.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from astropy.time import Time
-
-__all__ = ["_checkTime"]
-
-
-def _checkTime(time, arg_name):
-    """
-    Check that 'time' is an astropy time object, if not, raise an error.
-
-    Parameters
-    ----------
-    time : `~astropy.time.core.Time`
-
-    arg_name : str
-        Name of argument in function.
-
-    Returns
-    -------
-    None
-
-    Raises
-    ------
-    ValueError : If time is not an astropy time object.
-    """
-    err = (
-        "Time ({}) has to be an `~astropy.time.core.Time` object.\n"
-        "Convert using:\n\n"
-        "from astropy.time import Time\n"
-        "times = Time(t_array, scale='...', format='...')"
-    )
-    if type(time) != Time:
-        raise TypeError(err.format(arg_name))
-    return
diff --git a/thor/utils/linkages.py b/thor/utils/linkages.py
deleted file mode 100644
index 029c810a..00000000
--- a/thor/utils/linkages.py
+++ /dev/null
@@ -1,387 +0,0 @@
-import logging
-import time
-
-import numpy as np
-import pandas as pd
-
-__all__ = [
-    "generateCombinations",
-    "sortLinkages",
-    "identifySubsetLinkages",
-    "removeDuplicateLinkages",
-    "removeDuplicateObservations",
-    "calcDeltas",
-]
-
-logger = logging.getLogger(__name__)
-
-
-def generateCombinations(x, idx=None, ct=None, reps=None):
-    # Magic from the wizard himself: Mario Juric
-    # recursively generate all combinations of idx, assuming
-    # ct is the list of repeat counts of idx
-    if x is not None:
-        # initialization; find indices of where the repetitions are
-        _, idx, ct = np.unique(x, return_counts=True, return_index=True)
-        reps = np.nonzero(ct > 1)[0]
-    if len(reps) == 0:
-        yield idx
-        return
-    i = reps[0]
-    idx = idx.copy()
-    for _ in range(ct[i]):
-        yield from generateCombinations(None, idx, ct, reps[1:])
-        idx[i] += 1
-
-
-def sortLinkages(linkages, linkage_members, observations, linkage_id_col="orbit_id"):
-    """
-    Check that linkages and linkage_members have their linkage IDs in the same order. If not,
-    sort both by linkage ID. Second, check that linkage_members is additionally sorted by
-    mjd_utc. If linkage_members does not contain the mjd_utc column, then observations will be merged
-    to retrieve the observation time.
-
-    Parameters
-    ----------
-    linkages : `~pandas.DataFrame`
-        DataFrame containing at least a linkage ID column (linkage_id_col). Each unique linkage should
-        be only present once.
-    linkage_members : `~pandas.DataFrame`
-        DataFrame containing at least a linkage ID column (linkage_id_col) and an observation ID column ('obs_id'). The observation ID
-        column is used to merge on observations so that the observation time can be retrieved.
-    observations : `~pandas.DataFrame`
-        DataFrame containing observations with at least an observation ID column ('obs_id') and a observation time
-        column ('mjd_utc').
-    linkage_id_col : str
-        Name of the linkage ID column.
-
-    Returns
-    -------
-    linkages : `~pandas.DataFrame`
-        Linkages sorted by linkage IDs.
-    linkage_members : `~pandas.DataFrame`
-        Linkages sorted by linkage IDs and observation times.
-    """
-    time_start = time.time()
-    logger.debug("Verifying linkages...")
-
-    linkages_verified = linkages.copy()
-    linkage_members_verified = linkage_members.copy()
-
-    reset_index = False
-    id_sorted = np.all(
-        linkages_verified[linkage_id_col].values
-        == linkage_members_verified[linkage_id_col].unique()
-    )
-    if not id_sorted:
-        logger.debug(
-            f"Linkages and linkage_members dataframes are not equally sorted by {linkage_id_col}. Sorting..."
-        )
-        # Sort by linkage_id
-        sort_start = time.time()
-        linkages_verified.sort_values(by=[linkage_id_col], inplace=True)
-        linkage_members_verified.sort_values(by=[linkage_id_col], inplace=True)
-        sort_end = time.time()
-        duration = sort_end - sort_start
-        logger.debug(f"Sorting completed in {duration:.3f}s.")
-        reset_index = True
-
-    time_present = True
-    if "mjd_utc" not in linkage_members_verified.columns:
-        logger.debug(
-            "Observation time column ('mjd_utc') is not in linkage_members, merging with observations..."
-        )
-
-        # Merge with observations to get the observation time for each observation in linkage_members
-        merge_start = time.time()
-        linkage_members_verified = linkage_members_verified.merge(
-            observations[["obs_id", "mjd_utc"]], on="obs_id", how="left"
-        )
-        merge_end = time.time()
-        duration = merge_end - merge_start
-        logger.debug(f"Merging completed in {duration:.3f}s.")
-        time_present = False
-
-    linkage_members_verified_ = linkage_members_verified.sort_values(
-        by=[linkage_id_col, "mjd_utc"]
-    )
-    time_sorted = np.all(
-        linkage_members_verified_[[linkage_id_col, "obs_id"]].values
-        == linkage_members_verified[[linkage_id_col, "obs_id"]].values
-    )
-    if not time_sorted:
-        logger.debug(
-            f"Linkage_members is not sorted by {linkage_id_col} and mjd_utc. Sorting..."
-        )
-
-        # Sort by linkage_id, mjd_utc, and finally obs_id
-        sort_start = time.time()
-        linkage_members_verified.sort_values(
-            by=[linkage_id_col, "mjd_utc", "obs_id"], inplace=True
-        )
-        sort_end = time.time()
-        duration = sort_end - sort_start
-        logger.debug(f"Sorting completed in {duration:.3f}s.")
-        reset_index = True
-
-    if reset_index:
-        for df in [linkages_verified, linkage_members_verified]:
-            df.reset_index(inplace=True, drop=True)
-
-    if not time_present:
-        linkage_members_verified.drop(columns=["mjd_utc"], inplace=True)
-
-    time_end = time.time()
-    duration = time_end - time_start
-    logger.debug(f"Linkages verified in {duration:.3f}s.")
-    return linkages_verified, linkage_members_verified
-
-
-def identifySubsetLinkages(linkage_members, linkage_id_col="orbit_id"):
-    """
-    Identify subset linkages. A subset is defined as a linkage which contains
-    the a subset of observation IDs of a larger or equally sized linkaged.
-
-    For example, if a linkage B has constituent observations: obs0001, obs0002, obs0003, obs0004.
-    Then a linkage A with constituent observations: obs0001, obs0002, obs0003; is a subset of
-    B. If linkage A and B share exactly the same observations they will be identified as subsets of each
-    other.
-
-    Parameters
-    ----------
-    linkage_members : `~pandas.DataFrame`
-        DataFrame containing at least a linkage ID column (linkage_id_col) and an observation ID column ('obs_id').
-    linkage_id_col : str
-        Name of the linkage ID column.
-
-    Returns
-    -------
-    subsets : `~pandas.DataFrame`
-        DataFrame containing a column with the linkage_id and a second column containing the linkages identified
-        as subsets. A linkage with multiple subsets will appear once for every subset linkage found.
-    """
-    # Create a dictionary keyed on linkage ID with a set of each linkage's observation
-    # ID as values
-    time_start = time.time()
-    linkage_dict = {}
-    for linkage_id in linkage_members[linkage_id_col].unique():
-        obs_ids = linkage_members[linkage_members[linkage_id_col] == linkage_id][
-            "obs_id"
-        ].values
-        linkage_dict[linkage_id] = set(obs_ids)
-    time_end = time.time()
-    duration = time_end - time_start
-    logger.debug(f"Linkage dictionary created in {duration:.3f}s.")
-
-    time_start = time.time()
-    subset_dict = {}
-    for linkage_id_a in linkage_dict.keys():
-        # Grab linkage A's observations
-        obs_ids_a = linkage_dict[linkage_id_a]
-
-        for linkage_id_b in linkage_dict.keys():
-            # If linkage A is not linkage B then
-            # check if linkage B is a subset of linkage A
-            if linkage_id_b != linkage_id_a:
-
-                # Grab linkage B's observations
-                obs_ids_b = linkage_dict[linkage_id_b]
-                if obs_ids_b.issubset(obs_ids_a):
-
-                    # Linkage B is a subset of Linkage A, so lets
-                    # add this result to the subset dictionary
-                    if linkage_id_a not in subset_dict.keys():
-                        subset_dict[linkage_id_a] = [linkage_id_b]
-                    else:
-                        subset_dict[linkage_id_a].append(linkage_id_b)
-    time_end = time.time()
-    duration = time_end - time_start
-    logger.debug(f"Linkage dictionary scanned for subsets in {duration:.3f}s.")
-
-    time_start = time.time()
-    linkage_ids = []
-    subset_linkages = []
-    for linkage_id, subset_ids in subset_dict.items():
-        subset_linkages += subset_ids
-        linkage_ids += [linkage_id for i in range(len(subset_ids))]
-    subsets = pd.DataFrame({"linkage_id": linkage_ids, "subset_ids": subset_linkages})
-    time_end = time.time()
-    duration = time_end - time_start
-    logger.debug(f"Subset dataframe created in {duration:.3f}s.")
-
-    return subsets
-
-
-def removeDuplicateLinkages(linkages, linkage_members, linkage_id_col="orbit_id"):
-    """
-    Removes linkages that have identical observations as another linkage. Linkage quality is not taken
-    into account.
-
-    Parameters
-    ----------
-    linkages : `~pandas.DataFrame`
-        DataFrame containing at least the linkage ID.
-    linkage_members : `~pandas.DataFrame`
-        Dataframe containing the linkage ID and the observation ID for each of the linkage's
-        constituent observations. Each observation ID should be in a single row.
-    linkage_id_col : str, optional
-        Linkage ID column name (must be the same in both DataFrames).
-
-    Returns
-    -------
-    linkages : `~pandas.DataFrame`
-        DataFrame with duplicate linkages removed.
-    linkage_members : `~pandas.DataFrame`
-        DataFrame with duplicate linkages removed.
-    """
-    linkages_ = linkages.copy()
-    linkage_members_ = linkage_members.copy()
-
-    # Expand observation IDs into columns, then remove duplicates using pandas functionality
-    expanded = (
-        linkage_members_[[linkage_id_col, "obs_id"]]
-        .groupby(by=[linkage_id_col])["obs_id"]
-        .apply(list)
-        .to_frame(name="obs_ids")
-    )
-    expanded = expanded["obs_ids"].apply(pd.Series)
-    linkage_ids = expanded.drop_duplicates().index.values
-
-    linkages_ = linkages_[linkages_[linkage_id_col].isin(linkage_ids)]
-    linkage_members_ = linkage_members_[
-        linkage_members_[linkage_id_col].isin(linkage_ids)
-    ]
-
-    for df in [linkages_, linkage_members_]:
-        df.reset_index(inplace=True, drop=True)
-
-    return linkages_, linkage_members_
-
-
-def removeDuplicateObservations(
-    linkages,
-    linkage_members,
-    min_obs=5,
-    linkage_id_col="orbit_id",
-    filter_cols=["num_obs", "arc_length"],
-    ascending=[False, False],
-):
-    """
-    Removes duplicate observations using the filter columns. The filter columns are used to sort the linkages
-    as desired by the user. The first instance of the observation is kept and all other instances are removed.
-    If any linkage's number of observations drops below min_obs, that linkage is removed.
-
-    Parameters
-    ----------
-    linkages : `~pandas.DataFrame`
-        DataFrame containing at least the linkage ID.
-    linkage_members : `~pandas.DataFrame`
-        Dataframe containing the linkage ID and the observation ID for each of the linkage's
-        constituent observations. Each observation ID should be in a single row.
-    min_obs : int, optional
-        Minimum number of observations for a linkage to be viable.
-    linkage_id_col : str, optional
-        Linkage ID column name (must be the same in both DataFrames).
-    filter_cols : list, optional
-        List of column names to use to sort the linkages.
-    ascending : list, optional
-        Sort the filter_cols in ascending or descending order.
-
-    Returns
-    -------
-    linkages : `~pandas.DataFrame`
-        DataFrame with duplicate observations removed.
-    linkage_members : `~pandas.DataFrame`
-        DataFrame with duplicate observations removed.
-    """
-    linkages_ = linkages.copy()
-    linkage_members_ = linkage_members.copy()
-
-    # Sort linkages by the desired columns
-    linkages_.sort_values(
-        by=filter_cols, ascending=ascending, inplace=True, ignore_index=True
-    )
-
-    # Set both dataframe's indices to the linkage ID for
-    # faster querying
-    linkages_.set_index(linkage_id_col, inplace=True)
-    linkage_members_.set_index(linkage_id_col, inplace=True)
-
-    # Sort linkage members the same way as linkages
-    linkage_members_ = linkage_members_.loc[linkages_.index.values]
-    linkage_members_.reset_index(inplace=True)
-
-    # Drop all but the first duplicate observation
-    linkage_members_ = linkage_members_.drop_duplicates(subset=["obs_id"], keep="first")
-
-    # Make sure that the remaining linkages have enough observations (>= min_obs)
-    linkage_occurences = linkage_members_[linkage_id_col].value_counts()
-    linkages_to_keep = linkage_occurences.index.values[
-        linkage_occurences.values >= min_obs
-    ]
-    linkages_ = linkages_[linkages_.index.isin(linkages_to_keep)]
-    linkage_members_ = linkage_members_[
-        linkage_members_[linkage_id_col].isin(linkages_to_keep)
-    ]
-
-    # Reset indices
-    linkages_.reset_index(inplace=True)
-    linkage_members_.reset_index(inplace=True, drop=True)
-    return linkages_, linkage_members_
-
-
-def calcDeltas(
-    linkage_members,
-    observations=None,
-    groupby_cols=["orbit_id", "night_id"],
-    delta_cols=["mjd_utc", "RA_deg", "Dec_deg", "mag"],
-):
-    """
-    Calculate deltas for the desired columns. For example, if groupby columns are given to be orbit_id and night id, then
-    the linkages are grouped first by orbit_id then night_id, and then the difference in quantities are calculated for
-    each column in delta_cols. This can be used to calculate the nightly time difference in observations per linkage, or the
-    amount of motion a linkage has between observations, etc...
-
-    Parameters
-    ----------
-    linkage_members : `~pandas.DataFrame`
-        DataFrame containing at least a linkage ID column (linkage_id_col) and an observation ID column ('obs_id'). The observation ID
-        column is used to merge on observations so that the columns from the observations dataframe can be retrieved if necessary.
-    observations : `~pandas.DataFrame`
-        DataFrame containing observations with at least an observation ID column ('obs_id').
-    groupby_cols : list
-        Columns by which to group the linkages and calculate deltas.
-    delta_cols : list
-        Columns for which to calculate deltas.
-
-    Returns
-    -------
-    linkage_members : `~pandas.DataFrame`
-        Copy of the linkage_members dataframe with the delta columns added.
-    """
-    linkage_members_ = linkage_members.copy()
-
-    # Check to see if each column on which a delta should be
-    # calculated is in linkage_members, if not look for it
-    # in observations
-    cols = []
-    for col in delta_cols + groupby_cols:
-        if col not in linkage_members_.columns:
-            if col not in observations.columns or observations is None:
-                err = f"{col} could not be found in either linkage_members or observations."
-                raise ValueError(err)
-
-            cols.append(col)
-
-    if len(cols) > 0:
-        linkage_members_ = linkage_members_.merge(
-            observations[["obs_id"] + cols], on="obs_id", how="left"
-        )
-
-    nightly = linkage_members_.groupby(by=groupby_cols)
-
-    deltas = nightly[delta_cols].diff()
-    deltas.rename(columns={c: f"d{c}" for c in delta_cols}, inplace=True)
-    linkage_members_ = linkage_members_.join(deltas)
-
-    return linkage_members_
diff --git a/thor/utils/logging.py b/thor/utils/logging.py
index 9eb99b1f..c78a11af 100644
--- a/thor/utils/logging.py
+++ b/thor/utils/logging.py
@@ -1,9 +1,8 @@
 import logging
 import os
 import sys
-import time
 
-__all__ = ["setupLogger", "Timer"]
+__all__ = ["setupLogger"]
 
 logger = logging.getLogger(__name__)
 
@@ -57,72 +56,3 @@ def setupLogger(name, out_dir=None):
         logger.addHandler(file_handler)
 
     return logger
-
-
-class Timer:
-    def __init__(
-        self,
-        file_name=None,
-        file_dir="/tmp/thor/",
-        prepend_data=[],
-        sep=",",
-        open_kwargs={
-            "mode": "a",
-            "buffering": -1,
-            "encoding": "utf-8",
-        },
-    ):
-        """
-        Timing context manager that stores timing results and given user
-        data to a file if desired.
-
-
-        Parameters
-        ----------
-        file_name : {str, None}
-            Name of file including extension but excluding file directory.
-        file_dir : str
-            Directory where to save file. Defaults to /tmp/thor/.
-        prepend_data : list
-            Additional data that should be prepended to outputs.
-        sep : str
-            If prepend_data is not an empty list, this separator will be used to concatenate
-            data and the time elapsed into a single line.
-        open_kwargs : dict
-            Parameters with which to open the file.
-        """
-        self.time_start = 0
-        self.time_end = 0
-        self.file_name = file_name
-        self.file_dir = file_dir
-        self.file_path = None
-        self.file = None
-        self.prepend_data = prepend_data
-        self.sep = sep
-
-        if isinstance(file_name, str):
-            if not os.path.isdir(file_dir):
-                os.makedirs(file_dir, exist_ok=False)
-            self.file_path = os.path.join(file_dir, file_name)
-            self.file = open(self.file_path, **open_kwargs)
-        return
-
-    def __enter__(self):
-        self.time_start = time.time()
-        return
-
-    def __exit__(self, type, value, traceback):
-        self.time_end = time.time()
-        duration = self.time_end - self.time_start
-        data = self.prepend_data + [duration]
-        string = self.sep.join([str(d) for d in data])
-
-        if self.file is not None:
-            self.file.writelines([string + "\n"])
-            self.file.close()
-        else:
-            if len(self.prepend_data) == 0:
-                string = f"Time elapsed: {duration:.8f}s"
-            logger.info(string)
-
-        return
diff --git a/thor/utils/multiprocessing.py b/thor/utils/multiprocessing.py
deleted file mode 100644
index 8058b12e..00000000
--- a/thor/utils/multiprocessing.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import logging
-import multiprocessing as mp
-import signal
-
-import numpy as np
-import pandas as pd
-
-__all__ = ["Timeout", "yieldChunks", "calcChunkSize", "_initWorker", "_checkParallel"]
-
-logger = logging.getLogger(__name__)
-
-
-class Timeout:
-    ### Taken from https://stackoverflow.com/a/22348885
-    def __init__(self, seconds=30, error_message="Timeout"):
-        self.seconds = seconds
-        self.error_message = error_message
-
-    def handle_timeout(self, signum, frame):
-        raise TimeoutError(self.error_message)
-
-    def __enter__(self):
-        signal.signal(signal.SIGALRM, self.handle_timeout)
-        signal.alarm(self.seconds)
-
-    def __exit__(self, type, value, traceback):
-        signal.alarm(0)
-
-
-def yieldChunks(indexable, chunk_size):
-    """
-    Generator that yields chunks of size chunk_size.
-
-    Parameters
-    ----------
-    indexable : list, `~numpy.ndarray`, `~pandas.DataFrame`, `~pandas.Series` (N)
-        Indexable object that needs to be divided into
-        chunks.
-    chunk_size : int
-        Size of each chunk.
-
-    Yields
-    ------
-    chunk : indexable (<=chunk_size)
-        Chunks of indexable
-    """
-    if isinstance(indexable, list) or isinstance(indexable, np.ndarray):
-        for c in range(0, len(indexable), chunk_size):
-            yield indexable[c : c + chunk_size]
-    elif isinstance(indexable, pd.DataFrame) or isinstance(indexable, pd.Series):
-        for c in range(0, len(indexable), chunk_size):
-            yield indexable.iloc[c : c + chunk_size]
-    else:
-        err = "Indexable should be one of {list, `~numpy.ndarray`, `~pandas.DataFrame`, `~pandas.Series`}"
-        raise ValueError(err)
-
-
-def calcChunkSize(n, num_workers, max_chunk_size, min_chunk_size=1):
-    """
-    Calculate the optimal chunk size such that each worker gets at
-    least min_chunk_size chunks but does not get more than max_chunk_size
-    chunks. The goal is for no worker to be idle in terms of the number of items
-    it recieves
-
-    Parameters
-    ----------
-    n : int
-        Number of items.
-    num_workers : int
-        Number of workers to which items will be distributed.
-    max_chunk_size : int
-        Maximum chunk size to be given to each worker.
-    min_chunk_size : int, optional
-        Minimum chunk size to be given to each worker.
-
-    Yields
-    ------
-    chunk_size : int
-        Chunk_size between min_chunk_size and max_chunk_size.
-    """
-    # Calculate the number of n that should be sent to each worker
-    c = np.maximum(np.floor(n / num_workers), min_chunk_size).astype(int)
-
-    # Make sure this number does not exceed the maximum chunk size
-    chunk_size = np.minimum(c, max_chunk_size)
-    return chunk_size
-
-
-def _initWorker():
-    """
-    Tell multiprocessing worker to ignore signals, will only
-    listen to parent process.
-    """
-    signal.signal(signal.SIGINT, signal.SIG_IGN)
-    return
-
-
-def _checkParallel(num_jobs, parallel_backend):
-    """
-    Helper function to determine how many workers (jobs) should be submitted.
-    If the parallelization backend is Python's multiprocessing ('mp') and num_jobs
-    is either "auto" or None, then mp.cpu_count() will be used to determine the number
-    of jobs. If num_jobs is not "auto" or None, then that number will be used instead.
-    If the parallelization backend is ray, then the number of resources on the cluster will
-    determine the number of workers, and num_jobs is ignored.
-
-    Parameters
-    ----------
-    num_jobs : {None, "auto", int}
-        Number of jobs to launch.
-    parallel_backend : str
-        Name of backend. Should be one of {'ray', 'mp', 'cf'}.
-
-    Returns
-    -------
-    enable_parallel : bool
-        True if parallelization should be used (true for cases where num_jobs > 1).
-    num_workers : int
-        The number of workers to use.
-
-    Raises
-    ------
-    ValueError : If parallel_backend is not one of {'ray', 'mp', 'cf'}.
-    """
-    if isinstance(num_jobs, str) or (num_jobs is None) or (num_jobs > 1):
-
-        # Check that pareallel_backend is one of the support types
-        backends = ["ray", "mp", "cf"]
-        if parallel_backend not in backends:
-            err = "parallel_backend should be one of {'ray', 'mp', 'cf'}"
-            raise ValueError(err)
-
-        enable_parallel = True
-        if parallel_backend == "ray":
-            import ray
-
-            if not num_jobs == "auto" or num_jobs is not None:
-                logger.warning(
-                    "This process is running with the ray parallelization backend: num_jobs parameter will be ignored."
-                )
-
-            num_workers = int(ray.cluster_resources()["CPU"])
-
-        else:
-            if num_jobs == "auto" or num_jobs is None:
-                num_workers = mp.cpu_count()
-            else:
-                num_workers = num_jobs
-    else:
-        num_workers = 1
-        enable_parallel = False
-
-    return enable_parallel, num_workers
diff --git a/thor/utils/observations.py b/thor/utils/observations.py
deleted file mode 100644
index 1c8ab1bf..00000000
--- a/thor/utils/observations.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import numpy as np
-from astropy.time import Time
-
-from .astropy import _checkTime
-
-__all__ = ["calcLinkingWindow", "calcNight"]
-
-
-def calcLinkingWindow(times: Time, length: float) -> np.ndarray:
-    """
-    Calculate the linking window for each given time. Linking windows are counted
-    from the earliest time. For example, if given the following times and a defined
-    window size of 5 days then the times will be assigned as follows:
-    Time:                    Linking Window
-        2021-08-08T12:00:00.000     1
-        2021-08-09T12:00:00.000     1
-        2021-08-10T12:00:00.000     1
-        2021-08-11T12:00:00.000     1
-        2021-08-12T12:00:00.000     1
-        2021-08-13T12:00:00.000     2
-        2021-08-14T12:00:00.000     2
-        2021-08-15T12:00:00.000     2
-        2021-08-16T12:00:00.000     2
-        2021-08-17T12:00:00.000     2
-
-    Parameters
-    ----------
-    times : `~astropy.time.core.Time`
-        Observation times that need to be assigned to a linking window.
-    length : float, int
-        Length of the desired linking window in units of decimal days.
-
-    Returns
-    -------
-    window : `~numpy.ndarray` (N)
-        The linking window number to which each time was assigned.
-    """
-    _checkTime(times, "times")
-    dt = times.mjd - times.mjd.min()
-    window = np.floor(dt / length) + 1
-    return window.astype(int)
-
-
-def calcNight(times: Time, local_midnight: float) -> np.ndarray:
-    """
-    Given a set of observation times, calculates the night
-    on which those observations were made.
-
-    Parameters
-    ----------
-    times : `~astropy.core.Time` (N)
-        Observation times.
-    local_midnight : float
-        The change in time from UTC at which local midnight occurs for the observer
-        or observatory that made the observations. This should be a float value in units
-        of decimal days. For those locations East of the Prime Meridian these
-        would be positive values (e.g., Brussels would be +1/24, Shanghai would be +8/24).
-        For locations West of the Prime Meridian these values should be negative
-        (e.g., New York would be -4/24, Seattle would be -7/24).
-
-    Returns
-    -------
-    night : `~numpy.ndarray` (N)
-        Night on which the observation was made expressed as an integer MJD.
-    """
-    _checkTime(times, "times")
-    mjd_utc = times.utc.mjd
-    local_mjd = mjd_utc - local_midnight
-    # Observations that occur after midnight should be assigned the night value
-    # of the observations that occurred before midnight, so shift all the observations
-    # 12 hours earlier and use that as the night value
-    nights = local_mjd - 12 / 24
-    nights = nights.astype(int)
-    return nights
diff --git a/thor/utils/patches.py b/thor/utils/patches.py
deleted file mode 100644
index fab9444f..00000000
--- a/thor/utils/patches.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import healpy as hp
-import numpy as np
-
-__all__ = ["assignPatchesSquare", "assignPatchesHEALPix"]
-
-
-def assignPatchesSquare(
-    ra: np.ndarray,
-    dec: np.ndarray,
-    ra_width: float = 15.0,
-    dec_width: float = 15.0,
-) -> np.ndarray:
-    """
-    Assign a patch ID to each coordinate where a patch is a square region
-    on the sky plane of ra_width in RA and of dec_width in Dec.
-
-    RA must be between 0 and 360 degrees.
-    Dec must be between -90 and 90 degrees.
-
-    Patches are created first in increasing Declination and second in increasing
-    Right Ascension. The first patch will border RA of 0 and Dec of -90, while
-    the last patch will border RA of 360 and Dec of 90.
-
-    Parameters
-    ----------
-    ra : `~numpy.ndarray` (N)
-        Right Ascension in degrees.
-    dec : `~numpy.ndarray` (N)
-        Declination in degrees.
-    ra_width : float
-        Width of patch in RA in degrees.
-    dec_with : float
-        Width of patch in Dec in degrees.
-
-    Returns
-    -------
-    patch_ids : `~numpy.ndarray` (N)
-        The patch ID for each coordinate.
-    """
-    ras = np.arange(0, 360 + ra_width, ra_width)
-    decs = np.arange(-90, 90 + dec_width, dec_width)
-
-    patch_id = 0
-    patch_ids = -1 * np.ones(len(ra), dtype=int)
-    for ra_i, ra_f in zip(ras[:-1], ras[1:]):
-        for dec_i, dec_f in zip(decs[:-1], decs[1:]):
-
-            mask = np.where(
-                ((ra >= ra_i) & (ra < ra_f) & (dec >= dec_i) & (dec < dec_f))
-            )
-            patch_ids[mask] = patch_id
-
-            patch_id += 1
-
-    return patch_ids
-
-
-def assignPatchesHEALPix(
-    ra: np.ndarray,
-    dec: np.ndarray,
-    nside: int = 1024,
-    nest: bool = True,
-    lonlat: bool = True,
-) -> np.ndarray:
-    """
-    Assign patches using a HEALPix schema.
-    For details see Górski et al. (2005).
-
-    Parameters
-    ----------
-    ra : `~numpy.ndarray` (N)
-        Right Ascension in degrees.
-    dec : `~numpy.ndarray` (N)
-        Declination in degrees.
-    nside : int
-        HEALPix nside parameter (must be a power of 2).
-
-    Returns
-    -------
-    patch_ids : `~numpy.ndarray` (N)
-        The patch ID for each coordinate.
-
-    References
-    ----------
-    [1] Górski, K. M., Hivon, E., Banday, A. J., Wandelt, B. D., Hansen, F. K., Reinecke, M., & Bartelmann, M. (2005).
-        HEALPix: A Framework for High-Resolution Discretization and Fast Analysis of Data Distributed on the Sphere.
-        The Astrophysical Journal, 622(2), 759. https://doi.org/10.1086/427976
-    """
-    patch_ids = hp.ang2pix(nside, ra, dec, nest=nest, lonlat=lonlat)
-
-    return patch_ids
diff --git a/thor/utils/quivr.py b/thor/utils/quivr.py
new file mode 100644
index 00000000..aaa705db
--- /dev/null
+++ b/thor/utils/quivr.py
@@ -0,0 +1,62 @@
+from typing import List, Literal, Optional
+
+import numpy as np
+import pyarrow as pa
+import quivr as qv
+
+__all__ = [
+    "drop_duplicates",
+]
+
+
+def drop_duplicates(
+    table: qv.AnyTable,
+    subset: Optional[List[str]] = None,
+    keep: Literal["first", "last"] = "first",
+) -> qv.AnyTable:
+    """
+    Drop duplicate rows from a `~quivr.Table`. This function is similar to
+    `~pandas.DataFrame.drop_duplicates` but it supports nested columns (representing
+    nested tables).
+
+    Parameters
+    ----------
+    table : `~quivr.Table`
+        Table to drop duplicate rows from.
+    subset : list of str, optional
+        Subset of columns to consider when dropping duplicates. If not specified then
+        all columns are used.
+    keep : {'first', 'last'}, default 'first'
+        If there are duplicate rows then keep the first or last row.
+
+    Returns
+    -------
+    table : `~quivr.Table`
+        Table with duplicate rows removed.
+    """
+    # Flatten the table so nested columns are dot-delimited at the top level
+    flattened_table = table.flattened_table()
+
+    # If subset is not specified then use all the columns
+    if subset is None:
+        subset = [c for c in flattened_table.column_names]
+
+    # Add an index column to the flattened table
+    flattened_table = flattened_table.add_column(
+        0, "index", pa.array(np.arange(len(flattened_table)))
+    )
+
+    if keep == "first":
+        agg_func = keep
+    elif keep == "last":
+        agg_func = keep
+    else:
+        raise ValueError("keep must be either 'first' or 'last'")
+    indices = (
+        flattened_table.group_by(subset, use_threads=False)
+        .aggregate([("index", agg_func)])
+        .column(f"index_{agg_func}")
+    )
+
+    # Take the indices from the flattened table and use them to index into the original table
+    return table.take(indices)
diff --git a/thor/utils/tests/__init__.py b/thor/utils/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/thor/utils/tests/test_astropy.py b/thor/utils/tests/test_astropy.py
deleted file mode 100644
index 0727fe8f..00000000
--- a/thor/utils/tests/test_astropy.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import numpy as np
-import pytest
-from astropy.time import Time
-
-from ..astropy import _checkTime
-
-
-def test__checkTime():
-    # Create an array of epochs
-    times = np.linspace(59580, 59590, 100)
-
-    # Test that an error is raised when times are not
-    # an astropy time object
-    with pytest.raises(TypeError):
-        _checkTime(times, "test")
-
-    # Test that _checkTime passes when an astropy time object is
-    # given as intended
-    times_astropy = Time(times, format="mjd", scale="utc")
-
-    return
diff --git a/thor/utils/tests/test_linkages.py b/thor/utils/tests/test_linkages.py
deleted file mode 100644
index e313969a..00000000
--- a/thor/utils/tests/test_linkages.py
+++ /dev/null
@@ -1,227 +0,0 @@
-import numpy as np
-import pandas as pd
-import pytest
-
-from ..linkages import calcDeltas, identifySubsetLinkages, sortLinkages
-
-### Create test data set
-linkage_ids = ["a", "b", "c"]
-linkage_lengths = [4, 5, 6]
-
-linkage_members_ids = []
-for i, lid in enumerate(linkage_ids):
-    linkage_members_ids += [lid for j in range(linkage_lengths[i])]
-obs_ids = [f"o{i:04d}" for i in range(len(linkage_members_ids))]
-
-times = []
-for i in range(len(linkage_ids)):
-    times += [np.arange(59000, 59000 + linkage_lengths[i])]
-times = np.concatenate(times)
-
-LINKAGES = pd.DataFrame({"linkage_id": linkage_ids})
-LINKAGE_MEMBERS = pd.DataFrame(
-    {
-        "linkage_id": linkage_members_ids,
-        "obs_id": obs_ids,
-        "mjd_utc": times,
-    }
-)
-OBSERVATIONS = pd.DataFrame(
-    {
-        "obs_id": obs_ids,
-        "mjd_utc": times,
-    }
-)
-OBSERVATIONS.sort_values(by=["mjd_utc", "obs_id"], inplace=True, ignore_index=True)
-
-
-def test_sortLinkages_timePresent():
-    np.random.seed(42)
-
-    # Scramble the linkages dataframe
-    len_linkages = len(LINKAGES)
-    scramble = np.random.choice(len_linkages, len_linkages, replace=False)
-    linkages_unsorted = LINKAGES.loc[scramble].reset_index(drop=True)
-
-    # Scramble the linkage_members dataframe
-    len_members = len(LINKAGE_MEMBERS)
-    scramble = np.random.choice(len_members, len_members, replace=False)
-    linkage_members_unsorted = LINKAGE_MEMBERS.loc[scramble].reset_index(drop=True)
-
-    # Sort scrambled linkages
-    linkages_sorted, linkage_members_sorted = sortLinkages(
-        linkages_unsorted,
-        linkage_members_unsorted,
-        OBSERVATIONS,
-        linkage_id_col="linkage_id",
-    )
-
-    # Make sure they returned dataframes match those created
-    pd.testing.assert_frame_equal(LINKAGES, linkages_sorted)
-    pd.testing.assert_frame_equal(LINKAGE_MEMBERS, linkage_members_sorted)
-
-
-def test_sortLinkages_timeMissing():
-    np.random.seed(42)
-
-    # Scramble the linkages dataframe
-    len_linkages = len(LINKAGES)
-    scramble = np.random.choice(len_linkages, len_linkages, replace=False)
-    linkages_unsorted = LINKAGES.loc[scramble].reset_index(drop=True)
-
-    # Scramble the linkage_members dataframe
-    len_members = len(LINKAGE_MEMBERS)
-    scramble = np.random.choice(len_members, len_members, replace=False)
-    linkage_members_unsorted = LINKAGE_MEMBERS.loc[scramble].reset_index(drop=True)
-    linkage_members_unsorted.drop(columns=["mjd_utc"], inplace=True)
-
-    # Sort scrambled linkages
-    linkages_sorted, linkage_members_sorted = sortLinkages(
-        linkages_unsorted,
-        linkage_members_unsorted,
-        OBSERVATIONS,
-        linkage_id_col="linkage_id",
-    )
-
-    # Make sure they returned dataframes match those created
-    pd.testing.assert_frame_equal(LINKAGES, linkages_sorted)
-    pd.testing.assert_frame_equal(
-        LINKAGE_MEMBERS[["linkage_id", "obs_id"]], linkage_members_sorted
-    )
-
-
-def test_calcDeltas():
-
-    # Calculate deltas for the time column and make sure they match the input dataset
-    linkages_members_ = calcDeltas(
-        LINKAGE_MEMBERS,
-        OBSERVATIONS,
-        groupby_cols=["linkage_id"],
-        delta_cols=["mjd_utc"],
-    )
-
-    assert "dmjd_utc" in linkages_members_.columns
-    assert (
-        linkages_members_[linkages_members_["linkage_id"] == "a"]["dmjd_utc"].sum() == 3
-    )
-    assert (
-        linkages_members_[linkages_members_["linkage_id"] == "b"]["dmjd_utc"].sum() == 4
-    )
-    assert (
-        linkages_members_[linkages_members_["linkage_id"] == "c"]["dmjd_utc"].sum() == 5
-    )
-
-
-def test_calcDeltas_columnInObservations():
-
-    # Calculate deltas for the time column and make sure they match the input dataset, but
-    # this time remove the column from linkage_members so that calcDeltas looks for it
-    # in observations
-    linkages_members_ = calcDeltas(
-        LINKAGE_MEMBERS[["linkage_id", "obs_id"]],
-        OBSERVATIONS,
-        groupby_cols=["linkage_id"],
-        delta_cols=["mjd_utc"],
-    )
-
-    assert "dmjd_utc" in linkages_members_.columns
-    assert (
-        linkages_members_[linkages_members_["linkage_id"] == "a"]["dmjd_utc"].sum() == 3
-    )
-    assert (
-        linkages_members_[linkages_members_["linkage_id"] == "b"]["dmjd_utc"].sum() == 4
-    )
-    assert (
-        linkages_members_[linkages_members_["linkage_id"] == "c"]["dmjd_utc"].sum() == 5
-    )
-
-
-def test_calcDeltas_missingColumn():
-
-    # Calculate deltas for the time column and make sure they match the input dataset
-    with pytest.raises(ValueError):
-        linkages_members_ = calcDeltas(
-            LINKAGE_MEMBERS[["linkage_id", "obs_id"]],
-            OBSERVATIONS[["obs_id"]],
-            groupby_cols=["linkage_id"],
-            delta_cols=["mjd_utc"],
-        )
-
-
-def test_identifySubsetLinkages_0subsets():
-
-    # No subsets should be found in the default dataset
-    subsets = identifySubsetLinkages(LINKAGE_MEMBERS, linkage_id_col="linkage_id")
-    assert len(subsets) == 0
-
-    return
-
-
-def test_identifySubsetLinkages_3subsets():
-
-    # Make a copy of the linkage members dataframe
-    # Rename the observations so that A is a subset of B and C, and so that B is a subset of C.
-    linkage_members = LINKAGE_MEMBERS.copy()
-    for linkage_id in LINKAGE_MEMBERS["linkage_id"].unique():
-        num_obs = len(linkage_members[linkage_members["linkage_id"].isin([linkage_id])])
-        linkage_members.loc[
-            linkage_members["linkage_id"].isin([linkage_id]), "obs_id"
-        ] = [f"o{i:04d}" for i in range(num_obs)]
-
-    subsets = identifySubsetLinkages(linkage_members, linkage_id_col="linkage_id")
-
-    # Make sure A and B have been identified as subsets of C
-    C_subsets = subsets[subsets["linkage_id"] == "c"]["subset_ids"].values
-    assert "a" in C_subsets
-    assert "b" in C_subsets
-    assert len(C_subsets) == 2
-
-    # Make sure A has been identifed as a subsets of B
-    B_subsets = subsets[subsets["linkage_id"] == "b"]["subset_ids"].values
-    assert "a" in B_subsets
-    assert len(B_subsets) == 1
-
-    # Make sure A has no subsets
-    A_subsets = subsets[subsets["linkage_id"] == "a"]["subset_ids"].values
-    assert len(A_subsets) == 0
-
-    return
-
-
-def test_identifySubsetLinkages_3duplicates():
-
-    # Make a copy of the linkage members dataframe
-    # Rename the observations so that A is a subset of B and C, and so that B is a subset of C.
-    linkage_members = LINKAGE_MEMBERS.copy()
-    for linkage_id in LINKAGE_MEMBERS["linkage_id"].unique():
-        num_obs = len(linkage_members[linkage_members["linkage_id"].isin([linkage_id])])
-        linkage_members.loc[
-            linkage_members["linkage_id"].isin([linkage_id]), "obs_id"
-        ] = [f"o{i:04d}" for i in range(num_obs)]
-
-    # Trim the linkages so that they have exactly the same observations
-    linkage_members = linkage_members[
-        ~linkage_members["obs_id"].isin(["o0004", "o0005"])
-    ].copy()
-
-    subsets = identifySubsetLinkages(linkage_members, linkage_id_col="linkage_id")
-
-    # Make sure A and B have been identified as subsets of C
-    C_subsets = subsets[subsets["linkage_id"] == "c"]["subset_ids"].values
-    assert "a" in C_subsets
-    assert "b" in C_subsets
-    assert len(C_subsets) == 2
-
-    # Make sure A and C have been identified as subsets of B
-    B_subsets = subsets[subsets["linkage_id"] == "b"]["subset_ids"].values
-    assert "a" in B_subsets
-    assert "c" in B_subsets
-    assert len(B_subsets) == 2
-
-    # Make sure B and C have been identified as subsets of C
-    C_subsets = subsets[subsets["linkage_id"] == "c"]["subset_ids"].values
-    assert "a" in C_subsets
-    assert "b" in C_subsets
-    assert len(C_subsets) == 2
-
-    return
diff --git a/thor/utils/tests/test_multiprocessing.py b/thor/utils/tests/test_multiprocessing.py
deleted file mode 100644
index fd7cb6e5..00000000
--- a/thor/utils/tests/test_multiprocessing.py
+++ /dev/null
@@ -1,372 +0,0 @@
-import numpy as np
-import pandas as pd
-import pytest
-
-from ..multiprocessing import calcChunkSize, yieldChunks
-
-
-def test_yieldChunks_list():
-    # Create list of data
-    indexable = [i for i in range(15)]
-
-    # Set chunk_size to 10
-    chunk_size = 10
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 9
-    chunk = next(generator)
-    desired = np.arange(0, 10, 1)
-    np.testing.assert_array_equal(np.array(chunk), desired)
-
-    # Second iteration should yield 10 through 15
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(np.array(chunk), desired)
-
-    # Set chunk_size to 5
-    chunk_size = 5
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 4
-    chunk = next(generator)
-    desired = np.arange(0, 5, 1)
-    np.testing.assert_array_equal(np.array(chunk), desired)
-
-    # Second iteration should yield 5 through 9
-    chunk = next(generator)
-    desired = np.arange(5, 10, 1)
-    np.testing.assert_array_equal(np.array(chunk), desired)
-
-    # Third iteration should yield 10 through 14
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(np.array(chunk), desired)
-
-    # Set chunk_size to 20
-    chunk_size = 20
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 14
-    chunk = next(generator)
-    desired = np.arange(0, 15, 1)
-    np.testing.assert_array_equal(np.array(chunk), desired)
-    return
-
-
-def test_yieldChunks_array():
-    # Create list of data
-    indexable = np.arange(0, 15, 1)
-
-    # Set chunk_size to 10
-    chunk_size = 10
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 9
-    chunk = next(generator)
-    desired = np.arange(0, 10, 1)
-    np.testing.assert_array_equal(chunk, desired)
-
-    # Second iteration should yield 10 through 15
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk, desired)
-
-    # Set chunk_size to 5
-    chunk_size = 5
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 4
-    chunk = next(generator)
-    desired = np.arange(0, 5, 1)
-    np.testing.assert_array_equal(chunk, desired)
-
-    # Second iteration should yield 5 through 9
-    chunk = next(generator)
-    desired = np.arange(5, 10, 1)
-    np.testing.assert_array_equal(chunk, desired)
-
-    # Third iteration should yield 10 through 14
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk, desired)
-
-    # Set chunk_size to 20
-    chunk_size = 20
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 14
-    chunk = next(generator)
-    desired = np.arange(0, 15, 1)
-    np.testing.assert_array_equal(chunk, desired)
-    return
-
-
-def test_yieldChunks_series():
-    # Create series of data
-    indexable = pd.Series(np.arange(0, 15, 1))
-
-    # Set chunk_size to 10
-    chunk_size = 10
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 9
-    chunk = next(generator)
-    desired = np.arange(0, 10, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Second iteration should yield 10 through 15
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Set chunk_size to 5
-    chunk_size = 5
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 4
-    chunk = next(generator)
-    desired = np.arange(0, 5, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Second iteration should yield 5 through 9
-    chunk = next(generator)
-    desired = np.arange(5, 10, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Third iteration should yield 10 through 14
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Set chunk_size to 20
-    chunk_size = 20
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 14
-    chunk = next(generator)
-    desired = np.arange(0, 15, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-    return
-
-
-def test_yieldChunks_series_offsetIndex():
-    # Create series of data
-    indexable = pd.Series(np.arange(0, 15, 1))
-    # Offset the index and make sure chunking is done independent of
-    # the values of the index
-    indexable.index = np.arange(0, 150, 10)
-
-    # Set chunk_size to 10
-    chunk_size = 10
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 9
-    chunk = next(generator)
-    desired = np.arange(0, 10, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Second iteration should yield 10 through 15
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Set chunk_size to 5
-    chunk_size = 5
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 4
-    chunk = next(generator)
-    desired = np.arange(0, 5, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Second iteration should yield 5 through 9
-    chunk = next(generator)
-    desired = np.arange(5, 10, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Third iteration should yield 10 through 14
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-
-    # Set chunk_size to 20
-    chunk_size = 20
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 14
-    chunk = next(generator)
-    desired = np.arange(0, 15, 1)
-    np.testing.assert_array_equal(chunk.values, desired)
-    return
-
-
-def test_yieldChunks_dataframe_offsetIndex():
-    # Create dataframe of data
-    data = {"x": np.arange(0, 15, 1)}
-    indexable = pd.DataFrame(data)
-    # Offset the index and make sure chunking is done independent of
-    # the values of the index
-    indexable.index = np.arange(0, 150, 10)
-
-    # Set chunk_size to 10
-    chunk_size = 10
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 9
-    chunk = next(generator)
-    desired = np.arange(0, 10, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Second iteration should yield 10 through 15
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Set chunk_size to 5
-    chunk_size = 5
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 4
-    chunk = next(generator)
-    desired = np.arange(0, 5, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Second iteration should yield 5 through 9
-    chunk = next(generator)
-    desired = np.arange(5, 10, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Third iteration should yield 10 through 14
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Set chunk_size to 20
-    chunk_size = 20
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 14
-    chunk = next(generator)
-    desired = np.arange(0, 15, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-    return
-
-
-def test_yieldChunks_dataframe():
-    # Create dataframe of data
-    data = {"x": np.arange(0, 15, 1)}
-    indexable = pd.DataFrame(data)
-
-    # Set chunk_size to 10
-    chunk_size = 10
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 9
-    chunk = next(generator)
-    desired = np.arange(0, 10, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Second iteration should yield 10 through 15
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Set chunk_size to 5
-    chunk_size = 5
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 4
-    chunk = next(generator)
-    desired = np.arange(0, 5, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Second iteration should yield 5 through 9
-    chunk = next(generator)
-    desired = np.arange(5, 10, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Third iteration should yield 10 through 14
-    chunk = next(generator)
-    desired = np.arange(10, 15, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-
-    # Set chunk_size to 20
-    chunk_size = 20
-    generator = yieldChunks(indexable, chunk_size)
-
-    # First iteration should yield 0 through 14
-    chunk = next(generator)
-    desired = np.arange(0, 15, 1)
-    np.testing.assert_array_equal(chunk["x"].values, desired)
-    return
-
-
-def test_yieldChunks_errors():
-
-    # Make sure yieldChunks raises an error for unsupported types
-    chunk_size = 1
-    with pytest.raises(ValueError):
-        generator = yieldChunks(set(), chunk_size)
-        next(generator)
-
-    return
-
-
-def test_calcChunkSize():
-
-    num_workers = 60
-    n = 100
-    max_chunk_size = 10
-    min_chunk_size = 1
-
-    # Number of workers is less than 2x the number of things to process, so
-    # chunk size should fall to the minimum value
-    chunk_size = calcChunkSize(
-        n, num_workers, max_chunk_size, min_chunk_size=min_chunk_size
-    )
-    assert chunk_size == 1
-
-    min_chunk_size = 5
-    # Number of workers is less than 2x the number of things to process, so
-    # chunk size should fall to the minimum value
-    chunk_size = calcChunkSize(
-        n, num_workers, max_chunk_size, min_chunk_size=min_chunk_size
-    )
-    assert chunk_size == 5
-
-    num_workers = 10
-    n = 1000
-    max_chunk_size = 10
-    min_chunk_size = 1
-
-    # Number of things to process is 10x the number of workers, the max chunk size is 10 so
-    # the chunk_size should be 10
-    chunk_size = calcChunkSize(
-        n, num_workers, max_chunk_size, min_chunk_size=min_chunk_size
-    )
-    assert chunk_size == 10
-
-    num_workers = 10
-    n = 1000
-    max_chunk_size = 5
-    min_chunk_size = 1
-
-    # Number of things to process is 10x the number of workers, the max chunk size is 5 so
-    # the chunk_size should be 5
-    chunk_size = calcChunkSize(
-        n, num_workers, max_chunk_size, min_chunk_size=min_chunk_size
-    )
-    assert chunk_size == 5
-
-    num_workers = 100
-    n = 10000
-    max_chunk_size = 1000
-    min_chunk_size = 1
-
-    # Number of things to process is 10x the number of workers, the max chunk size is 1000 so
-    # the chunk_size should be 10000/100 = 100
-    chunk_size = calcChunkSize(
-        n, num_workers, max_chunk_size, min_chunk_size=min_chunk_size
-    )
-    assert chunk_size == 100
-    return
diff --git a/thor/utils/tests/test_observations.py b/thor/utils/tests/test_observations.py
deleted file mode 100644
index b9a3e63e..00000000
--- a/thor/utils/tests/test_observations.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import numpy as np
-import pytest
-from astropy.time import Time
-
-from ..observations import calcLinkingWindow, calcNight
-
-
-def test_calcLinkingWindow():
-
-    # Times are all in August 2021
-    time = Time([59426 + 1 / 24], scale="utc", format="mjd")
-    times = time + np.arange(0, 30)
-
-    # If the linking window exceeds the range of the observations
-    # then they all should be in the first linking window
-    length = 30
-    linking_window = calcLinkingWindow(times, length)
-    np.testing.assert_equal(linking_window, np.ones(length, dtype=int))
-
-    # If the linking window is 15 days then we expect 15 observations
-    # in linking window 1 and 15 in window 2
-    length = 15
-    linking_window = calcLinkingWindow(times, length)
-    np.testing.assert_equal(linking_window[0:15], np.ones(length, dtype=int))
-    np.testing.assert_equal(linking_window[15:30], 2 * np.ones(length, dtype=int))
-
-    # If the linking window is 10 days then we expect 10 observations
-    # in linking window 1 and 10 in window 2 and 10 in window 3
-    length = 10
-    linking_window = calcLinkingWindow(times, length)
-    np.testing.assert_equal(linking_window[0:10], np.ones(length, dtype=int))
-    np.testing.assert_equal(linking_window[10:20], 2 * np.ones(length, dtype=int))
-    np.testing.assert_equal(linking_window[20:30], 3 * np.ones(length, dtype=int))
-
-    return
-
-
-def test_calcLinkingWindow_raises():
-
-    # Times are all in August 2021
-    time = Time([59426 + 1 / 24], scale="utc", format="mjd")
-    times = time + np.arange(0, 30)
-
-    # Passing a plain array should raise a TypeError
-    with pytest.raises(TypeError):
-        linking_window = calcLinkingWindow(times.mjd, 10)
-
-    return
-
-
-def test_calcNight_london():
-    # Lets assume three days of observations were made
-    # somewhere in London with observations occuring
-    # +- 5 hours around midnight
-    dts = np.linspace(-5, 5, 20)
-    dts /= 24.0
-    offset = 0 / 24
-    days = [59001, 59002, 59003]
-    times = np.zeros(len(dts) * len(days))
-    for i, day in enumerate(days):
-        times[i * len(dts) : (i + 1) * len(dts)] = day + dts + offset
-    observation_times = Time(times, scale="utc", format="mjd")
-
-    night = calcNight(observation_times, offset)
-    assert np.all(night[0:20] == 59000)
-    assert np.all(night[20:40] == 59001)
-    assert np.all(night[40:60] == 59002)
-    return
-
-
-def test_calcNight_seattle():
-    # Lets assume three days of observations were made
-    # somewhere in Seattle with observations occuring
-    # +- 5 hours around midnight
-    dts = np.linspace(-5, 5, 20)
-    dts /= 24.0
-    offset = -7 / 24
-    days = [59001, 59002, 59003]
-    times = np.zeros(len(dts) * len(days))
-    for i, day in enumerate(days):
-        times[i * len(dts) : (i + 1) * len(dts)] = day + dts + offset
-    observation_times = Time(times, scale="utc", format="mjd")
-
-    night = calcNight(observation_times, offset)
-    assert np.all(night[0:20] == 59000)
-    assert np.all(night[20:40] == 59001)
-    assert np.all(night[40:60] == 59002)
-    return
-
-
-def test_calcNight_shanghai():
-    # Lets assume three days of observations were made
-    # somewhere in Shanghai with observations occuring
-    # +- 5 hours around midnight
-    dts = np.linspace(-5, 5, 20)
-    dts /= 24.0
-    offset = 8 / 24
-    days = [59001, 59002, 59003]
-    times = np.zeros(len(dts) * len(days))
-    for i, day in enumerate(days):
-        times[i * len(dts) : (i + 1) * len(dts)] = day + dts + offset
-    observation_times = Time(times, scale="utc", format="mjd")
-
-    night = calcNight(observation_times, offset)
-    assert np.all(night[0:20] == 59000)
-    assert np.all(night[20:40] == 59001)
-    assert np.all(night[40:60] == 59002)
-    return
diff --git a/thor/utils/tests/test_patches.py b/thor/utils/tests/test_patches.py
deleted file mode 100644
index 74553388..00000000
--- a/thor/utils/tests/test_patches.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import numpy as np
-
-from ..patches import assignPatchesHEALPix, assignPatchesSquare
-
-
-def test_assignPatchesSquare():
-
-    ra_width = 15
-    dec_width = 15
-    num_ra = 360 / ra_width
-    num_dec = 180 / dec_width
-
-    # Create fake observations at the midpoint of each bin
-    ra = np.arange(0.0, 360, ra_width)
-    ra += ra_width / 2
-    dec = np.arange(-90, 90.0, dec_width)
-    dec += dec_width / 2
-
-    dec, ra = np.meshgrid(dec, ra)
-    dec = dec.flatten()
-    ra = ra.flatten()
-    patch_ids_desired = np.arange(0, len(dec), dtype=int)
-
-    patch_ids = assignPatchesSquare(ra, dec, ra_width=ra_width, dec_width=dec_width)
-
-    # Check that the patch IDs match the expected order
-    # (increasing Dec first, then increasing RA)
-    np.testing.assert_equal(patch_ids_desired, patch_ids)
-
-
-def test_assignPatchesHEALPix():
-
-    # Create observations
-    ra = np.arange(0.0, 360, 10)
-    dec = np.arange(-90, 90.0, 10)
-
-    dec, ra = np.meshgrid(dec, ra)
-    dec = dec.flatten()
-    ra = ra.flatten()
-    nside = 32
-
-    # Not really testing for anything other than
-    # that the function call works..
-    patch_ids = assignPatchesHEALPix(ra, dec, nside)
-    assert len(patch_ids) == len(ra)