From d019ed3f82ef690e35095a77978e14e9f711a778 Mon Sep 17 00:00:00 2001
From: gwm17 <gordonmccann215@gmail.com>
Date: Fri, 21 Feb 2025 17:39:19 -0500
Subject: [PATCH 1/4] Refactor of simulator, solver. Add option for selective
 sim.

---
 src/attpc_engine/detector/simulator.py   | 340 +++--------------------
 src/attpc_engine/detector/solver.py      | 161 +++++++++++
 src/attpc_engine/detector/transporter.py |   1 -
 3 files changed, 202 insertions(+), 300 deletions(-)

diff --git a/src/attpc_engine/detector/simulator.py b/src/attpc_engine/detector/simulator.py
index ca54efd..4c49496 100644
--- a/src/attpc_engine/detector/simulator.py
+++ b/src/attpc_engine/detector/simulator.py
@@ -1,12 +1,5 @@
 from .parameters import Config
-from .solver import (
-    equation_of_motion,
-    stop_condition,
-    forward_z_bound_condition,
-    backward_z_bound_condition,
-    rho_bound_condition,
-)
-from .transporter import transport_track
+from .solver import generate_point_cloud
 from .writer import SimulationWriter
 from .constants import NUM_TB
 from .. import nuclear_map
@@ -16,7 +9,6 @@
 import numpy as np
 import h5py as h5
 from tqdm import trange
-from scipy.integrate import solve_ivp
 from numpy.random import default_rng, Generator
 from pathlib import Path
 from numba.typed import Dict
@@ -24,105 +16,6 @@
 from numba import njit
 
 
-# Time steps to solve ODE at. Each step is 1e-10 s
-TIME_STEPS = np.linspace(0, 10e-7, 10001)
-
-
-class SimEvent:
-    """A simulated event from the kinematics pipeline.
-
-    Parameters
-    ----------
-    kinematics: numpy.ndarray
-        Nx4 array of four vectors of all N nuclei in the simulated event. From first
-        to last column are px, py, pz, E.
-    vertex: numpy.ndarray
-        The reaction vertex position in meters. Given as a 3-array formated
-        [x,y,z].
-    proton_numbers: numpy.ndarray
-        Nx1 array of proton numbers of all nuclei from reaction and decays in the pipeline.
-    mass_numbers: numpy.ndarray
-        Nx1 array of mass numbers of all nuclei from reaction and decasy in the pipeline.
-
-    Attributes
-    ----------
-    nuclei: list[SimParticle]
-        Instance of SimParticle for each nuclei in the exit channel of the pipeline.
-
-    Methods
-    -------
-    digitize(config)
-        Transform the kinematics into point clouds
-    """
-
-    def __init__(
-        self,
-        kinematics: np.ndarray,
-        vertex: np.ndarray,
-        proton_numbers: np.ndarray,
-        mass_numbers: np.ndarray,
-    ):
-        self.nuclei: list[SimParticle] = []
-
-        # Only simulate the nuclei in the exit channel
-        # Pattern is:
-        # skip target, projectile, take ejectile
-        # then skip every other. Append last nucleus as well
-        total_nuclei = len(kinematics) - 1
-        for idx in range(2, total_nuclei, 2):
-            if proton_numbers[idx] == 0:
-                continue
-            self.nuclei.append(
-                SimParticle(
-                    kinematics[idx], vertex, proton_numbers[idx], mass_numbers[idx]
-                )
-            )
-        if proton_numbers[-1] != 0:
-            self.nuclei.append(
-                SimParticle(
-                    kinematics[-1], vertex, proton_numbers[-1], mass_numbers[-1]
-                )
-            )
-
-    def digitize(self, config: Config, rng: Generator) -> np.ndarray:
-        """Transform the kinematics into point clouds
-
-        Digitizes the simulated event, converting the number of
-        electrons detected on each pad to a signal in ADC units.
-        The hardware ID of each pad is included to make the complete
-        AT-TPC trace.
-
-        Parameters
-        ----------
-        config: Config
-            The simulation configuration
-
-        Returns
-        -------
-        point_array: numpy.ndarray
-            An Nx3 array representing the point cloud. Each row is a point, with elements
-            [pad id, time bucket, electrons]
-        """
-        # Sum points from all particles
-        points = Dict.empty(key_type=types.int64, value_type=types.int64)
-        for nuc in self.nuclei:
-            nuc.generate_point_cloud(config, rng, points)
-
-        # Convert to numpy array of [pad, tb, e], now combined over pad/tb combos
-        point_array = dict_to_points(points)
-
-        # Wiggle point TBs over interval [0.0, 1.0). This simulates effect of converting
-        # the (in principle) int TBs to floats.
-        point_array[:, 1] += rng.uniform(low=0.0, high=1.0, size=len(point_array))
-
-        # Remove points outside legal bounds in time. TODO check if this is needed
-        point_array = point_array[
-            np.logical_and(0 <= point_array[:, 1], point_array[:, 1] < NUM_TB)
-        ]
-
-        return point_array
-
-
 @njit
 def dict_to_points(points: NumbaTypedDict[int, int]) -> np.ndarray:
     """
@@ -149,205 +42,52 @@ def dict_to_points(points: NumbaTypedDict[int, int]) -> np.ndarray:
     return point_array
 
 
-class SimParticle:
-    """
-    A nucleus in a simulated event
-
-    Parameters
-    ----------
-    data: numpy.ndarray
-        Simulated four vector of nucleus from kinematics.
-    vertex: numpy.ndarray
-        The reaction vertex position in meters. Given as a 3-array formated
-        [x,y,z].
-    proton_number: int
-        Number of protons in nucleus
-    mass_number: int
-        Number of nucleons in nucleus
-
-    Attributes
-    ----------
-    data: numpy.ndarray
-        Simulated four vector of nucleus from kinematics.
-    nucleus: spyral_utils.nuclear.NucleusData
-        Data of the simulated nucleus
-    vertex: numpy.ndarray
-        The reaction vertex position in meters. Given as a 3-array formated
-        [x,y,z].
-
-    Methods
-    ----------
-    generate_track()
-        Solves EoM for this nucleus in the AT-TPC
-    generate_electrons()
-        Finds the number of electrons produced at each point of the
-        simulated track
-    generate_point_cloud()
-        Makes the AT-TPC point cloud of this nucleus' track
-    """
-
-    def __init__(
-        self,
-        data: np.ndarray,
-        vertex: np.ndarray,
-        proton_number: int,
-        mass_number: int,
-    ):
-        self.data = data
-        self.nucleus = nuclear_map.get_data(proton_number, mass_number)
-        self.vertex = vertex
-
-    def generate_track(self, config: Config) -> np.ndarray:
-        """Solves EoM for this nucleus in the AT-TPC
-
-        Solution is evaluated over a fixed time interval.
-
-        Parameters
-        ----------
-        config: Config
-            The simulation configuration
-
-        Returns
-        -------
-        numpy.ndarray
-            Nx6 array where each row is a solution to one of the ODEs evaluated at
-            the Nth time step.
-        """
-        # Find initial state of nucleus. (x, y, z, px, py, pz)
-        initial_state: np.ndarray = np.zeros(6)
-        initial_state[:3] = self.vertex  # m
-        initial_state[3:] = self.data[:3] / self.nucleus.mass  # unitless (gamma * beta)
-
-        # Set ODE stop conditions. See SciPy solve_ivp docs
-        stop_condition.terminal = True
-        stop_condition.direction = -1.0
-        forward_z_bound_condition.terminal = True
-        forward_z_bound_condition.direction = 1.0
-        backward_z_bound_condition.terminal = True
-        backward_z_bound_condition.direction = -1.0
-        rho_bound_condition.terminal = True
-        rho_bound_condition.direction = 1.0
-
-        track = solve_ivp(
-            equation_of_motion,
-            (0.0, 1.0),  # Set upper time limit to 1 sec. This should never be reached
-            initial_state,
-            method="Radau",
-            events=[
-                stop_condition,
-                forward_z_bound_condition,
-                backward_z_bound_condition,
-                rho_bound_condition,
-            ],
-            t_eval=TIME_STEPS,
-            args=(
-                config.det_params.bfield * -1.0,
-                config.det_params.efield * -1.0,
-                config.det_params.gas_target,
-                self.nucleus,
-            ),
-        )
-
-        return track.y.T  # Return transpose to easily index by row
-
-    def generate_electrons(
-        self, config: Config, track: np.ndarray, rng: Generator
-    ) -> np.ndarray:
-        """Find the number of electrons made at each point of the nucleus' track.
-
-        Parameters
-        ----------
-        config: Config
-            The simulation configuration
-        track: numpy.ndarray
-            Nx6 array where each row is a solution to one of the ODEs evaluated at
-            the Nth time step.
-        rng: numpy.random.Generator
-            numpy random number generator
-
-        Returns
-        -------
-        numpy.ndarray
-            Returns an array of the number of electrons made at each
-            point in the trajectory
-        """
-        # Find energy of nucleus at each point of its track
-        gv = np.linalg.norm(track[:, 3:], axis=1)
-        beta = np.sqrt(gv**2.0 / (1.0 + gv**2.0))
-        gamma = gv / beta
-        energy = self.nucleus.mass * (gamma - 1.0)  # MeV
-
-        # Find number of electrons created at each point of its track
-        electrons = np.zeros_like(energy)
-        electrons[1:] = abs(np.diff(energy))  # Magnitude of energy lost
-        electrons *= (
-            1.0e6 / config.det_params.w_value
-        )  # Convert to eV to match units of W-value
-
-        # Adjust number of electrons by Fano factor, can only have integer amount of electrons
-        electrons = np.array(
-            [
-                rng.normal(point, np.sqrt(config.det_params.fano_factor * point))
-                for point in electrons
-            ],
-            dtype=np.int64,
-        )
-        return electrons
-
-    def generate_point_cloud(
-        self, config: Config, rng: Generator, points: NumbaTypedDict
-    ):
-        """Create the point cloud
-
-        Finds the pads hit by the electrons transported from each point
-        of the nucleus' trajectory to the pad plane.
-
-        Parameters
-        ----------
-        config: Config
-            The simulation configuration
-        rng: numpy.random.Generator
-            numpy random number generator
-        points: numba.typed.Dict[int, int]
-            A dictionary mapping a unique pad,tb key to the number of electrons.
-        """
-        # Generate nucleus' track and calculate the electrons made at each point
-        track = self.generate_track(config)
-        electrons = self.generate_electrons(config, track, rng)
-
-        # Remove points in trajectory that create less than 1 electron
-        mask = electrons >= 1
-        track = track[mask]
-        electrons = electrons[mask]
+def simulate(
+    momenta: np.ndarray,
+    vertex: np.ndarray,
+    proton_numbers: np.ndarray,
+    mass_numbers: np.ndarray,
+    config: Config,
+    rng: Generator,
+    indicies: list[int] | None,
+) -> np.ndarray:
+    nuclei_to_sim = None
+    if indicies is not None:
+        nuclei_to_sim = indicies
+    else:
+        # default nuclei to sim, all final outgoing particles
+        nuclei_to_sim = [idx for idx in range(2, len(proton_numbers), 2)]
+        nuclei_to_sim.append(len(proton_numbers) - 1)  # add the last
+
+    points = Dict.empty(key_type=types.int64, value_type=types.int64)
+
+    for idx in nuclei_to_sim:
+        if proton_numbers[idx] == 0:
+            continue
+        nucleus = nuclear_map.get_data(proton_numbers[idx], mass_numbers[idx])
+        momentum = momenta[idx]
+        generate_point_cloud(momentum, vertex, nucleus, config, rng, points)
 
-        # Apply gain factor from micropattern gas detectors
-        electrons *= config.det_params.mpgd_gain
+    # Convert to numpy array of [pad, tb, e], now combined over pad/tb combos
+    point_array = dict_to_points(points)
 
-        # Convert z position of trajectory to exact time buckets
-        dv = config.drift_velocity
-        track[:, 2] = (
-            config.det_params.length - track[:, 2]
-        ) / dv + config.elec_params.micromegas_edge
+    # Wiggle point TBs over interval [0.0, 1.0). This simulates effect of converting
+    # the (in principle) int TBs to floats.
+    point_array[:, 1] += rng.uniform(low=0.0, high=1.0, size=len(point_array))
 
-        if config.pad_grid_edges is None or config.pad_grid is None:
-            raise ValueError("Pad grid is not loaded at SimParticle.generate_hits!")
+    # Remove points outside legal bounds in time. TODO check if this is needed
+    point_array = point_array[
+        np.logical_and(0 <= point_array[:, 1], point_array[:, 1] < NUM_TB)
+    ]
 
-        transport_track(
-            config.pad_grid,
-            config.pad_grid_edges,
-            config.det_params.diffusion,
-            config.det_params.efield,
-            config.drift_velocity,
-            track,
-            electrons,
-            points,
-        )
+    return point_array
 
 
 def run_simulation(
     config: Config,
     input_path: Path,
     writer: SimulationWriter,
+    indicies: list[int] | None = None,
 ):
     """Run the simulation
 
@@ -398,7 +138,7 @@ def run_simulation(
         dataset: h5.Dataset = input_data_group[f"chunk_{chunk}"][  # type: ignore
             f"event_{event_number}"
         ]  # type: ignore
-        sim = SimEvent(
+        cloud = simulate(
             dataset[:].copy(),  # type: ignore
             np.array(
                 [
@@ -409,9 +149,11 @@ def run_simulation(
             ),
             proton_numbers,  # type: ignore
             mass_numbers,  # type: ignore
+            config,
+            rng,
+            indicies,
         )
 
-        cloud = sim.digitize(config, rng)
         if len(cloud) == 0:
             continue
 
diff --git a/src/attpc_engine/detector/solver.py b/src/attpc_engine/detector/solver.py
index 9666b44..aa7bd5a 100644
--- a/src/attpc_engine/detector/solver.py
+++ b/src/attpc_engine/detector/solver.py
@@ -1,12 +1,19 @@
 import math
 import numpy as np
+from numpy.random import Generator
+from scipy.integrate import solve_ivp
 
 from spyral_utils.nuclear.target import GasTarget
 from spyral_utils.nuclear import NucleusData
 
+from .parameters import Config, DetectorParams
+from .typed_dict import NumbaTypedDict
+from .transporter import transport_track
 from .constants import MEV_2_JOULE, MEV_2_KG, C, E_CHARGE
 
 KE_LIMIT = 1e-6  # 1 eV
+# Time steps to solve ODE at. Each step is 1e-10 s
+TIME_STEPS = np.linspace(0, 10e-7, 10001)
 
 
 def equation_of_motion(
@@ -231,3 +238,157 @@ def rho_bound_condition(
 
     """
     return float(np.linalg.norm(state[:2])) - 0.292
+
+
+def generate_trajectory(
+    vertex: np.ndarray,
+    momentum: np.ndarray,
+    nucleus: NucleusData,
+    params: DetectorParams,
+) -> np.ndarray:
+    """Solves EoM for a nucleus in the AT-TPC
+
+    Solution is evaluated over a fixed time interval.
+
+    Parameters
+    ----------
+    config: Config
+        The simulation configuration
+
+    Returns
+    -------
+    numpy.ndarray
+        Nx6 array where each row is a solution to one of the ODEs evaluated at
+        the Nth time step.
+    """
+    # Find initial state of nucleus. (x, y, z, px, py, pz)
+    initial_state: np.ndarray = np.zeros(6)
+    initial_state[:3] = vertex  # m
+    initial_state[3:] = momentum[:3] / nucleus.mass  # unitless (gamma * beta)
+
+    # Set ODE stop conditions. See SciPy solve_ivp docs
+    stop_condition.terminal = True
+    stop_condition.direction = -1.0
+    forward_z_bound_condition.terminal = True
+    forward_z_bound_condition.direction = 1.0
+    backward_z_bound_condition.terminal = True
+    backward_z_bound_condition.direction = -1.0
+    rho_bound_condition.terminal = True
+    rho_bound_condition.direction = 1.0
+
+    track = solve_ivp(
+        equation_of_motion,
+        (0.0, 1.0),  # Set upper time limit to 1 sec. This should never be reached
+        initial_state,
+        method="Radau",
+        events=[
+            stop_condition,
+            forward_z_bound_condition,
+            backward_z_bound_condition,
+            rho_bound_condition,
+        ],
+        t_eval=TIME_STEPS,
+        args=(
+            params.bfield * -1.0,
+            params.efield * -1.0,
+            params.gas_target,
+            nucleus,
+        ),
+    )
+
+    return track.y.T  # Return transpose to easily index by row
+
+
+def generate_electrons(
+    track: np.ndarray, nucleus: NucleusData, params: DetectorParams, rng: Generator
+):
+    """Find the number of electrons made at each point of the nucleus' track.
+
+    Parameters
+    ----------
+    config: Config
+        The simulation configuration
+    track: numpy.ndarray
+        Nx6 array where each row is a solution to one of the ODEs evaluated at
+        the Nth time step.
+    rng: numpy.random.Generator
+        numpy random number generator
+
+    Returns
+    -------
+    numpy.ndarray
+        Returns an array of the number of electrons made at each
+        point in the trajectory
+    """
+    # Find energy of nucleus at each point of its track
+    gv = np.linalg.norm(track[:, 3:], axis=1)
+    beta = np.sqrt(gv**2.0 / (1.0 + gv**2.0))
+    gamma = gv / beta
+    energy = nucleus.mass * (gamma - 1.0)  # MeV
+
+    # Find number of electrons created at each point of its track
+    electrons = np.zeros_like(energy)
+    electrons[1:] = abs(np.diff(energy))  # Magnitude of energy lost
+    electrons *= 1.0e6 / params.w_value  # Convert to eV to match units of W-value
+
+    # Adjust number of electrons by Fano factor, can only have integer amount of electrons
+    electrons = np.array(
+        [rng.normal(point, np.sqrt(params.fano_factor * point)) for point in electrons],
+        dtype=np.int64,
+    )
+    return electrons
+
+
+def generate_point_cloud(
+    momentum: np.ndarray,
+    vertex: np.ndarray,
+    nucleus: NucleusData,
+    config: Config,
+    rng: Generator,
+    points: NumbaTypedDict,
+) -> None:
+    """Create the point cloud
+
+    Finds the pads hit by the electrons transported from each point
+    of the nucleus' trajectory to the pad plane.
+
+    Parameters
+    ----------
+    config: Config
+        The simulation configuration
+    rng: numpy.random.Generator
+        numpy random number generator
+    points: numba.typed.Dict[int, int]
+        A dictionary mapping a unique pad,tb key to the number of electrons.
+    """
+    # Generate nucleus' track and calculate the electrons made at each point
+    track = generate_trajectory(vertex, momentum, nucleus, config.det_params)
+    electrons = generate_electrons(track, nucleus, config.det_params, rng)
+
+    # Remove points in trajectory that create less than 1 electron
+    mask = electrons >= 1
+    track = track[mask]
+    electrons = electrons[mask]
+
+    # Apply gain factor from micropattern gas detectors
+    electrons *= config.det_params.mpgd_gain
+
+    # Convert z position of trajectory to exact time buckets
+    dv = config.drift_velocity
+    track[:, 2] = (
+        config.det_params.length - track[:, 2]
+    ) / dv + config.elec_params.micromegas_edge
+
+    if config.pad_grid_edges is None or config.pad_grid is None:
+        raise ValueError("Pad grid is not loaded at SimParticle.generate_hits!")
+
+    transport_track(
+        config.pad_grid,
+        config.pad_grid_edges,
+        config.det_params.diffusion,
+        config.det_params.efield,
+        config.drift_velocity,
+        track,
+        electrons,
+        points,
+    )
diff --git a/src/attpc_engine/detector/transporter.py b/src/attpc_engine/detector/transporter.py
index e0f4773..65ec3a6 100644
--- a/src/attpc_engine/detector/transporter.py
+++ b/src/attpc_engine/detector/transporter.py
@@ -275,7 +275,6 @@ def find_pads_hit(
     # Point transport
     if sigma_t == 0.0:
         point_transport(pad_grid, grid_edges, time, center, electrons, points)
-
     # Transverse diffusion transport
     else:
         transverse_transport(

From c743bab61de2d78e92df361dbbd7e195fc6329c7 Mon Sep 17 00:00:00 2001
From: gwm17 <gordonmccann215@gmail.com>
Date: Mon, 24 Feb 2025 11:10:03 -0500
Subject: [PATCH 2/4] Implement labels in the output. Tested.

---
 src/attpc_engine/detector/simulator.py   | 66 +++++++++--------
 src/attpc_engine/detector/solver.py      |  2 +
 src/attpc_engine/detector/transporter.py | 90 ++++++++----------------
 src/attpc_engine/detector/writer.py      | 38 +++++-----
 tests/test_detector.py                   | 16 +++--
 5 files changed, 96 insertions(+), 116 deletions(-)

diff --git a/src/attpc_engine/detector/simulator.py b/src/attpc_engine/detector/simulator.py
index 4c49496..f3bf3b0 100644
--- a/src/attpc_engine/detector/simulator.py
+++ b/src/attpc_engine/detector/simulator.py
@@ -17,7 +17,9 @@
 
 
 @njit
-def dict_to_points(points: NumbaTypedDict[int, int]) -> np.ndarray:
+def dict_to_points(
+    points: NumbaTypedDict[int, tuple[int, int]],
+) -> tuple[np.ndarray, np.ndarray]:
     """
     Converts dictionary of N pad,tb keys with corresponding number of electrons
     to Nx3 array where each row is [pad, tb, e], now combined over pad/tb combos.
@@ -29,17 +31,19 @@ def dict_to_points(points: NumbaTypedDict[int, int]) -> np.ndarray:
 
     Returns
     -------
-    point_array: numpy.ndarray
-        Array of points.
+    tuple[numpy.ndarray, numpy.ndarray]
+        Array of points and lables (in that order)
     """
     point_array = np.empty((len(points), 3), dtype=float)
-    for idx, point in enumerate(points.items()):
-        tb, pad = unpair(point[0])
+    label_array = np.empty(len(points), dtype=types.int64)
+    for idx, (key, data) in enumerate(points.items()):
+        tb, pad = unpair(key)
         point_array[idx, 0] = pad
         point_array[idx, 1] = tb
-        point_array[idx, 2] = point[1]
+        point_array[idx, 2] = data[0]
+        label_array[idx] = data[1]
 
-    return point_array
+    return point_array, label_array
 
 
 def simulate(
@@ -49,38 +53,31 @@ def simulate(
     mass_numbers: np.ndarray,
     config: Config,
     rng: Generator,
-    indicies: list[int] | None,
-) -> np.ndarray:
-    nuclei_to_sim = None
-    if indicies is not None:
-        nuclei_to_sim = indicies
-    else:
-        # default nuclei to sim, all final outgoing particles
-        nuclei_to_sim = [idx for idx in range(2, len(proton_numbers), 2)]
-        nuclei_to_sim.append(len(proton_numbers) - 1)  # add the last
-
-    points = Dict.empty(key_type=types.int64, value_type=types.int64)
-
-    for idx in nuclei_to_sim:
+    indicies: list[int],
+) -> tuple[np.ndarray, np.ndarray]:
+    points = Dict.empty(
+        key_type=types.int64, value_type=types.Tuple(types=[types.int64, types.int64])
+    )
+    for idx in indicies:
         if proton_numbers[idx] == 0:
             continue
         nucleus = nuclear_map.get_data(proton_numbers[idx], mass_numbers[idx])
         momentum = momenta[idx]
-        generate_point_cloud(momentum, vertex, nucleus, config, rng, points)
+        generate_point_cloud(momentum, vertex, nucleus, config, rng, points, idx)
 
     # Convert to numpy array of [pad, tb, e], now combined over pad/tb combos
-    point_array = dict_to_points(points)
+    point_array, label_array = dict_to_points(points)
 
     # Wiggle point TBs over interval [0.0, 1.0). This simulates effect of converting
     # the (in principle) int TBs to floats.
     point_array[:, 1] += rng.uniform(low=0.0, high=1.0, size=len(point_array))
 
     # Remove points outside legal bounds in time. TODO check if this is needed
-    point_array = point_array[
-        np.logical_and(0 <= point_array[:, 1], point_array[:, 1] < NUM_TB)
-    ]
+    mask = np.logical_and(0 <= point_array[:, 1], point_array[:, 1] < NUM_TB)
+    point_array = point_array[mask]
+    label_array = label_array[mask]
 
-    return point_array
+    return point_array, label_array
 
 
 def run_simulation(
@@ -107,9 +104,18 @@ def run_simulation(
     print(f"Applying detector effects to kinematics from file: {input_path}")
     input = h5.File(input_path, "r")
     input_data_group: h5.Group = input["data"]  # type: ignore
-    proton_numbers = input_data_group.attrs["proton_numbers"]
+    proton_numbers: np.ndarray = input_data_group.attrs["proton_numbers"]  # type: ignore
     mass_numbers = input_data_group.attrs["mass_numbers"]
 
+    # Decide which nuclei to sim, either by user input or all reaction final products
+    nuclei_to_sim = None
+    if indicies is not None:
+        nuclei_to_sim = indicies
+    else:
+        # default nuclei to sim, all final outgoing particles
+        nuclei_to_sim = [idx for idx in range(2, len(proton_numbers), 2)]
+        nuclei_to_sim.append(len(proton_numbers) - 1)  # add the last
+
     n_events: int = input_data_group.attrs["n_events"]  # type: ignore
     miniters = int(0.01 * n_events)
     n_chunks: int = input_data_group.attrs["n_chunks"]  # type: ignore
@@ -138,7 +144,7 @@ def run_simulation(
         dataset: h5.Dataset = input_data_group[f"chunk_{chunk}"][  # type: ignore
             f"event_{event_number}"
         ]  # type: ignore
-        cloud = simulate(
+        cloud, labels = simulate(
             dataset[:].copy(),  # type: ignore
             np.array(
                 [
@@ -151,13 +157,13 @@ def run_simulation(
             mass_numbers,  # type: ignore
             config,
             rng,
-            indicies,
+            nuclei_to_sim,
         )
 
         if len(cloud) == 0:
             continue
 
-        writer.write(cloud, config, event_number)
+        writer.write(cloud, labels, config, event_number)
     writer.close()
     print("Done.")
     print("----------------------------------------")
diff --git a/src/attpc_engine/detector/solver.py b/src/attpc_engine/detector/solver.py
index aa7bd5a..a51b32f 100644
--- a/src/attpc_engine/detector/solver.py
+++ b/src/attpc_engine/detector/solver.py
@@ -346,6 +346,7 @@ def generate_point_cloud(
     config: Config,
     rng: Generator,
     points: NumbaTypedDict,
+    label: int,
 ) -> None:
     """Create the point cloud
 
@@ -391,4 +392,5 @@ def generate_point_cloud(
         track,
         electrons,
         points,
+        label,
     )
diff --git a/src/attpc_engine/detector/transporter.py b/src/attpc_engine/detector/transporter.py
index 65ec3a6..5037ee0 100644
--- a/src/attpc_engine/detector/transporter.py
+++ b/src/attpc_engine/detector/transporter.py
@@ -124,7 +124,8 @@ def point_transport(
     time: float,
     center: tuple[float, float],
     electrons: int,
-    points: NumbaTypedDict[int, int],
+    points: NumbaTypedDict[int, tuple[int, int]],
+    label: int,
 ):
     """
     Transports all electrons created at a point in a simulated nucleus' track
@@ -157,9 +158,9 @@ def point_transport(
     if pad != -1 and pad not in BEAM_PADS_ARRAY:
         tb = int(time)  # Convert from absolute time bucket to discretized
         id = pair(tb, pad)
-        points[id] = (
-            points.get(id, 0) + electrons
-        )  # The get returns 0 if the key doesn't exist
+        charge, _ = points.get(id, (0, 0))  # The get returns 0 if the key doesn't exist
+        charge += electrons
+        points[id] = (charge, label)
 
 
 @njit
@@ -170,7 +171,8 @@ def transverse_transport(
     center: tuple[float, float],
     electrons: int,
     sigma_t: float,
-    points: NumbaTypedDict[int, int],
+    points: NumbaTypedDict[int, tuple[int, int]],
+    label: int,
 ):
     """
     Transports all electrons created at a point in a simulated nucleus'
@@ -233,59 +235,9 @@ def transverse_transport(
                     * electrons
                 )
             )
-            points[id] = (
-                points.get(id, 0) + pixel_electrons
-            )  # The get returns 0 if the key doesn't exist
-
-
-@njit
-def find_pads_hit(
-    pad_grid: np.ndarray,
-    grid_edges: np.ndarray,
-    time: float,
-    center: tuple[float, float],
-    electrons: int,
-    sigma_t: float,
-    points: NumbaTypedDict[int, int],
-):
-    """
-    Finds the pads hit by transporting the electrons created at a point in
-    the nucleus' trajectory to the pad plane and applies transverse diffusion, if selected.
-
-    Parameters
-    ----------
-    pad_grid: numpy.ndarray
-        Grid of pad id for a given index, where index is calculated from x-y position
-    grid_edges: numpy.ndarray
-        Edges of the pad grid in mm, as well as the step size of the grid in mm
-        Allows conversion of position to grid index. 3 element array [low_edge, hi_edge, step]
-    time: float
-        Time of point being transported.
-    center: tuple[float, float]
-        (x,y) position of point being transported.
-    electrons: int
-        Number of electrons made at point being transported.
-    sigma_t: float
-        Standard deviation of transverse diffusion at point
-        being transported.
-    points: numba.typed.Dict[int, int]
-        A dictionary mapping a unique pad,tb key to the number of electrons, which
-        will be filled by this function
-    """
-    # Point transport
-    if sigma_t == 0.0:
-        point_transport(pad_grid, grid_edges, time, center, electrons, points)
-    # Transverse diffusion transport
-    else:
-        transverse_transport(
-            pad_grid,
-            grid_edges,
-            time,
-            center,
-            electrons,
-            sigma_t,
-            points,
-        )
+            charge, _ = points.get(id, (0, 0))
+            charge += pixel_electrons
+            points[id] = (charge, label)  # The get returns 0 if the key doesn't exist
 
 
 @njit
@@ -297,7 +249,8 @@ def transport_track(
     dv: float,
     track: np.ndarray,
     electrons: np.ndarray,
-    points: NumbaTypedDict[int, int],
+    points: NumbaTypedDict[int, tuple[int, int]],
+    label: int,
 ):
     """
     High-level function that transports each point in a nucleus' trajectory
@@ -333,6 +286,19 @@ def transport_track(
         center = (row[0], row[1])
         point_electrons = electrons[idx]
         sigma_t = np.sqrt(2.0 * diffusion * dv * time / efield)
-        find_pads_hit(
-            pad_grid, grid_edges, time, center, point_electrons, sigma_t, points
-        )
+        if sigma_t == 0.0:
+            point_transport(
+                pad_grid, grid_edges, time, center, point_electrons, points, label
+            )
+        # Transverse diffusion transport
+        else:
+            transverse_transport(
+                pad_grid,
+                grid_edges,
+                time,
+                center,
+                point_electrons,
+                sigma_t,
+                points,
+                label,
+            )
diff --git a/src/attpc_engine/detector/writer.py b/src/attpc_engine/detector/writer.py
index f60b4fb..97bdeda 100644
--- a/src/attpc_engine/detector/writer.py
+++ b/src/attpc_engine/detector/writer.py
@@ -23,7 +23,9 @@ class SimulationWriter(Protocol):
         Closes the writer.
     """
 
-    def write(self, data: np.ndarray, config: Config, event_number: int) -> None:
+    def write(
+        self, data: np.ndarray, labels: np.ndarray, config: Config, event_number: int
+    ) -> None:
         """
         Writes a simulated point cloud to the point cloud file.
 
@@ -83,8 +85,6 @@ def convert_to_spyral(
         (x, y) coordinates of each pad's center on the pad plane in mm.
     pad_sizes: np.ndarray
         Contains size of each pad.
-    adc_threshold: int
-        Minimum ADC signal amplitude a point must have in the point cloud.
 
     Returns
     -------
@@ -107,11 +107,7 @@ def convert_to_spyral(
         storage[idx, 6] = point[1]
         storage[idx, 7] = pad_sizes[int(point[0])]
 
-    if adc_threshold >= 4095:
-        raise ValueError(
-            "adc_threshold cannot be equal to or greater than the max GET ADC value!"
-        )
-    return storage[storage[:, 3] > adc_threshold]
+    return storage
 
 
 class SpyralWriter:
@@ -191,7 +187,9 @@ def create_next_file(self) -> None:
         self.file = h5.File(path, "w")
         self.cloud_group: h5.Group = self.file.create_group("cloud")
 
-    def write(self, data: np.ndarray, config: Config, event_number: int) -> None:
+    def write(
+        self, data: np.ndarray, labels: np.ndarray, config: Config, event_number: int
+    ) -> None:
         """
         Writes a simulated point cloud to the point cloud file.
 
@@ -224,21 +222,27 @@ def write(self, data: np.ndarray, config: Config, event_number: int) -> None:
             config.pad_sizes,
             config.elec_params.adc_threshold,
         )
+        # apply ADC threshold
+        mask = spyral_format[:, 3] > config.elec_params.adc_threshold
+        spyral_format = spyral_format[mask]
+        labels = labels[mask]
         # Make sure we're still sorted in z
         indicies = np.argsort(spyral_format[:, 2])
         spyral_format = spyral_format[indicies]
+        labels = labels[indicies]
 
-        dset = self.cloud_group.create_dataset(
+        pc_dset = self.cloud_group.create_dataset(
             f"cloud_{event_number}", data=spyral_format
         )
-
-        dset.attrs["orig_run"] = self.run_number
-        dset.attrs["orig_event"] = event_number
+        pc_dset.attrs["orig_run"] = self.run_number
+        pc_dset.attrs["orig_event"] = event_number
         # No ic stuff from simulation
-        dset.attrs["ic_amplitude"] = -1.0
-        dset.attrs["ic_multiplicity"] = -1.0
-        dset.attrs["ic_integral"] = -1.0
-        dset.attrs["ic_centroid"] = -1.0
+        pc_dset.attrs["ic_amplitude"] = -1.0
+        pc_dset.attrs["ic_multiplicity"] = -1.0
+        pc_dset.attrs["ic_integral"] = -1.0
+        pc_dset.attrs["ic_centroid"] = -1.0
+
+        _ = self.cloud_group.create_dataset(f"labels_{event_number}", data=labels)
 
         # We wrote an event
         self.events_written += 1
diff --git a/tests/test_detector.py b/tests/test_detector.py
index 1da208a..8faa24d 100644
--- a/tests/test_detector.py
+++ b/tests/test_detector.py
@@ -4,7 +4,7 @@
     PadParams,
     Config,
 )
-from attpc_engine.detector.simulator import SimEvent
+from attpc_engine.detector.simulator import simulate
 from attpc_engine import nuclear_map
 
 from spyral_utils.nuclear.target import GasTarget
@@ -45,17 +45,19 @@ def test_simulation_event():
     # all protons bby
     fake_data = np.array(
         [
-            [0.0, 0.0, 0.0, 938.0],
-            [0.0, 0.0, 0.0, 938.0],
-            [0.0, 0.0, 0.0, 938.0],
-            [0.0, 0.0, 0.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
+            [0.0, 0.0, 10.0, 938.0],
         ]
     )
 
     proton_numbers = np.array([1, 1, 1, 1])
     mass_numbers = np.array([1, 1, 1, 1])
     vertex = np.array([1.0, 1.0, 1.0])
+    config = Config(detector, electronics, pads)
+    rng = np.random.default_rng()
 
-    event = SimEvent(fake_data, vertex, proton_numbers, mass_numbers)
+    event = simulate(fake_data, vertex, proton_numbers, mass_numbers, config, rng, [0])
 
-    assert len(event.nuclei) == 2
+    assert len(event) == 2

From 1d44a9f50a6a7b9488b6144d91634c3d290b183b Mon Sep 17 00:00:00 2001
From: gwm17 <gordonmccann215@gmail.com>
Date: Mon, 24 Feb 2025 11:26:29 -0500
Subject: [PATCH 3/4] Bump version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a4377a8..19fd732 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "attpc_engine"
-version = "0.7.0"
+version = "0.8.0"
 description = "AT-TPC Monte-Carlo simulation engine"
 authors = [
     {name = "Gordon McCann", email = "gordonmccann215@gmail.com"},

From bb54f3f845b5f4d590fd2479a5b27185fb567e9c Mon Sep 17 00:00:00 2001
From: gwm17 <gordonmccann215@gmail.com>
Date: Tue, 25 Feb 2025 11:58:18 -0500
Subject: [PATCH 4/4] Doc update. Docstrings and fix the example script.

---
 docs/user_guide/detector/index.md        | 161 +++++++++++++++++++----
 docs/user_guide/getting_started.md       |   2 +-
 src/attpc_engine/detector/simulator.py   |  57 ++++++--
 src/attpc_engine/detector/solver.py      |  45 +++++--
 src/attpc_engine/detector/transporter.py |  39 ++++--
 src/attpc_engine/detector/writer.py      |  59 +++++----
 6 files changed, 272 insertions(+), 91 deletions(-)

diff --git a/docs/user_guide/detector/index.md b/docs/user_guide/detector/index.md
index b11780f..96540b2 100644
--- a/docs/user_guide/detector/index.md
+++ b/docs/user_guide/detector/index.md
@@ -1,80 +1,185 @@
 # Detector system
 
-The detector system applies AT-TPC specific detector and electronics effects to every event in the input kinematics file, generated from the kinematics system, to simulate how they would look in the AT-TPC. It is a Monte-Carlo simulation that aims to capture the overarching behavior of the detector and its electronics, not mimic their exact behavior which is horrendously complicated and not completely understood. The detector system code is based on code developed by Daniel Bazin originally in Igor Pro and includes effects pointed out by Adam Anthony in his [thesis](https://ezproxy.msu.edu/login?url=https://www.proquest.com/pqdtglobal1/dissertations-theses/fission-lead-region/docview/2855740534/sem-2?accountid=12598).
+The detector system applies AT-TPC specific detector and electronics effects to every 
+event in the input kinematics file, generated from the kinematics system, to simulate 
+how they would look in the AT-TPC. It is a Monte-Carlo simulation that aims to capture 
+the overarching behavior of the detector and its electronics, not mimic their exact 
+behavior which is horrendously complicated and not completely understood. The detector 
+system code is based on code developed by Daniel Bazin originally in Igor Pro and 
+includes effects pointed out by Adam Anthony in his [thesis](https://ezproxy.msu.edu/login?url=https://www.proquest.com/pqdtglobal1/dissertations-theses/fission-lead-region/docview/2855740534/sem-2?accountid=12598).
 
 ## Core steps of detector system
 
-The detector system code simulates an event by going through a series of steps to ultimately produce its point cloud. For each event, the detector system:
+The detector system code simulates an event by going through a series of steps to 
+ultimately produce its point cloud. For each event, the detector system:
 
 1. Generate the trajectory of the each nucleus in the exit channel of the event by solving its equation of motion in the AT-TPC with a fine grained-timestep
 2. Determines how many electrons are created at each point of each nucleus' trajectory
 3. Converts the z coordinate of each point in each trajectory to a GET electronics sampling-frequency based Time Bucket using the electron drift velocity
 3. Transports each point of the nucleus' trajectory to the pad plane, applying diffusion if requested, to identify the sensing pad for each timestep
-5. Form the point cloud from all of the trajectories
-4. Write the point cloud to disk
+5. Form the point cloud from all of the trajectories, and label each point with which nucleus produced it
+4. Write the point cloud and labels to disk
 
 We can now describe each step in greater detail.
 
 ## Track simulation
 
-The AT-TPC contains an inherent electric field parallel to the beam axis. It is also usually placed inside a solenoid that provides a magnetic field in the same direction. This means that a charged particle in the AT-TPC has an equation of motion given by the relativistic Lorentz force
+The AT-TPC contains an inherent electric field parallel to the beam axis. It is also 
+usually placed inside a solenoid that provides a magnetic field in the same direction. 
+This means that a charged particle in the AT-TPC has an equation of motion given by the
+relativistic Lorentz force
 
 $$ 
 \frac{d\pmb{p}}{dt} = q (\pmb{E} + \pmb{v} \times \pmb{B})
 $$
 
-where $\pmb{p}$ is the relativistic momentum, $\pmb{v}$ is the three velocity, $\pmb{E}$ is the electric field, and $\pmb{B}$ is the magnetic field. To prevent the ODE solver from calculating either very large or very small numbers, we actually divide both sides of this equation by the nucleus' rest mass and thus solve this differential equation for $\gamma \beta$. 
+where $\pmb{p}$ is the relativistic momentum, $\pmb{v}$ is the three velocity, $\pmb{E}$
+is the electric field, and $\pmb{B}$ is the magnetic field. To prevent the ODE solver 
+from calculating either very large or very small numbers, we actually divide both 
+sides of this equation by the nucleus' rest mass and thus solve this differential 
+equation for $\gamma \beta$. 
 
-The equation of motion is subject to the physical boundaries of the detector and the condition that the nucleus has more than 1 eV of kinetic energy. Solutions are provided by the solver at time steps of 1e-10 s.
+The equation of motion is subject to the physical boundaries of the detector and the 
+condition that the nucleus has more than 1 eV of kinetic energy. Solutions are provided 
+by the solver at time steps of 1e-10 s.
 
 ## Electron creation
 
-Using $\gamma \beta$ provided by the ODE solver at each time step, the number of electrons created through ionization of the gas by the nucleus can be calculated. $\gamma \beta$ is used to find $\beta$ through manipulation of the $\gamma$ factor equation. $\gamma \beta$ is divided by $\beta$ to uncouple $\gamma$ which is then used in the relativistic kinetic energy formula to find the kinetic energy of the nucleus at that point. The difference in energy between two successive points in the trajectory is defined as the energy lost by the nucleus at the point at the later time.
-
-With knowledge of the energy lost by the nucleus at the points in its trajectory, the electrons made can be found. The input W-value is the average energy needed to create an electron-ion pair in the gas, thus the energy lost at each point is divided by this value to find the electrons made. However, ionization is part statistical, so this number is taken as the mean of a normal distribution that is then sampled to find the actual number of electrons at each point. We say part statistical because it has been experimentally shown that the amount of electrons made does not follow purely statistical calculations. Therefore, the input Fano factor of the gas is a multiplicative constant that modifies the mean number of electrons made at each point in an attempt to capture the non-statistical nature of electron-ion pair creation in the gas.
-
-Only an integer amount of electrons can be made. Trunction is applied and points with less than one electron made are removed from the track.
+Using $\gamma \beta$ provided by the ODE solver at each time step, the number of 
+electrons created through ionization of the gas by the nucleus can be calculated. 
+$\gamma \beta$ is used to find $\beta$ through manipulation of the $\gamma$ factor 
+equation. $\gamma \beta$ is divided by $\beta$ to uncouple $\gamma$ which is then used 
+in the relativistic kinetic energy formula to find the kinetic energy of the nucleus at
+that point. The difference in energy between two successive points in the trajectory is
+defined as the energy lost by the nucleus at the point at the later time.
+
+With knowledge of the energy lost by the nucleus at the points in its trajectory, the 
+electrons made can be found. The input W-value is the average energy needed to create 
+an electron-ion pair in the gas, thus the energy lost at each point is divided by this 
+value to find the electrons made. However, ionization is part statistical, so this 
+number is taken as the mean of a normal distribution that is then sampled to find the 
+actual number of electrons at each point. We say part statistical because it has been 
+experimentally shown that the amount of electrons made does not follow purely 
+statistical calculations. Therefore, the input Fano factor of the gas is a 
+multiplicative constant that modifies the mean number of electrons made at each point 
+in an attempt to capture the non-statistical nature of electron-ion pair creation in 
+the gas.
+
+Only an integer amount of electrons can be made. Trunction is applied and points with 
+less than one electron made are removed from the track.
 
 ## Convert z position to time buckets
 
-The GET electronics records information in discrete time intervals called time buckets. The trajectory of the nucleus returned from the ODE solver is described by three Cartesian coordinates. The z coordinate of each point then must be converted to time buckets. The formula to convert the z coordinate $z_{coord}$ to time buckets $z_{tb}$ is given by 
+The GET electronics records information in discrete time intervals called time buckets. 
+The trajectory of the nucleus returned from the ODE solver is described by three 
+Cartesian coordinates. The z coordinate of each point then must be converted to time 
+buckets. The formula to convert the z coordinate $z_{coord}$ to time buckets $z_{tb}$ 
+is given by 
 
 $$ 
 z_{tb} = \frac{l - z_{coord}}{v_{e}} + m_{tb}
 $$
 
-where $l$ is the length of the AT-TPC active volume, $v_{e}$ is the electron drift velocity, and $m_{tb}$ is the micromegas time bucket. 
+where $l$ is the length of the AT-TPC active volume, $v_{e}$ is the electron drift 
+velocity, and $m_{tb}$ is the micromegas time bucket. 
 
-Two important points are made about this equation. First, $z_{coord}$ is subtracted from $l$. This is because the coordinate system used has the AT-TPC window at $z=0$ and the micromegas at $z=l$ and time buckets are recorded with respect to the micromegas (electrons drift towards it). Second, the resulting time bucket has $m_{tb}$ added to it. This is due to the fact that the micromegas edge of the detector does not necessarily correspond to a time bucket of zero. A similar statement can be made for the window edge of the detector; it does not necessarily correspond to the maximum recordable time bucket.
+Two important points are made about this equation. First, $z_{coord}$ is subtracted 
+from $l$. This is because the coordinate system used has the AT-TPC window at $z=0$ and
+the micromegas at $z=l$ and time buckets are recorded with respect to the micromegas 
+(electrons drift towards it). Second, the resulting time bucket has $m_{tb}$ added to 
+it. This is due to the fact that the micromegas edge of the detector does not 
+necessarily correspond to a time bucket of zero. A similar statement can be made for 
+the window edge of the detector; it does not necessarily correspond to the maximum 
+recordable time bucket.
 
 ## Point cloud construction
 
-A point cloud is an Nx3 matrix, where each row corresponds to a point containing the following information: sensing pad number, detection time in GET Time Buckets, and the number of electrons detected. Point cloud construction primarily deals with determining the first two from the calculated points in the nucleus' trajectory. There are two ways to create a point cloud, and we start by describing the simple case of no transverse diffusion.
-
-When the transverse diffusion is specified to be zero, point clouds are constructed from what we call "point transport" where the trajectory is projected onto the pad plane and the pad that each point hits is found from a lookup table. In this scheme, the electrons are not spread and hence the number of electrons that hit each pad is trivially the number of electrons at each point of the trajectory.
-
-When the transverse diffusion is non-zero, point clouds are constructed from the more involved "transverse transport" scheme. The electrons made at each point are smeared from a bivariate normal distribution over the pad plane. The standard deviation $\sigma$ of the bivariate distribution is the same for both the x and y directions and is given by
+A point cloud is an Nx3 matrix, where each row corresponds to a point containing the 
+following information: sensing pad number, detection time in GET Time Buckets, and the 
+number of electrons detected. Point cloud construction primarily deals with determining
+the first two from the calculated points in the nucleus' trajectory. There are two ways
+to create a point cloud, and we start by describing the simple case of no transverse 
+diffusion.
+
+When the transverse diffusion is specified to be zero, point clouds are constructed 
+from what we call "point transport" where the trajectory is projected onto the pad 
+plane and the pad that each point hits is found from a lookup table. In this scheme, 
+the electrons are not spread and hence the number of electrons that hit each pad is 
+trivially the number of electrons at each point of the trajectory.
+
+When the transverse diffusion is non-zero, point clouds are constructed from the more 
+involved "transverse transport" scheme. The electrons made at each point are smeared 
+from a bivariate normal distribution over the pad plane. The standard deviation 
+$\sigma$ of the bivariate distribution is the same for both the x and y directions and 
+is given by
 
 $$
 \sigma = \sqrt{2 D v_{e} t / E}
 $$
 
-where D is the diffusion coefficient, $t$ is the average electron drift time ($z_{tb}$), and $E$ is the magnitude of the electric field. See [Peisert and Sauli](https://cds.cern.ch/record/154069?ln=en) for the derivation of this equation. The smearing is done via a discretized grid and the same lookup table is used to find which pad each pixel hits.
+where D is the diffusion coefficient, $t$ is the average electron drift time 
+($z_{tb}$), and $E$ is the magnitude of the electric field. See 
+[Peisert and Sauli](https://cds.cern.ch/record/154069?ln=en) for the derivation of this
+equation. The smearing is done via a discretized grid and the same lookup table is used
+to find which pad each pixel hits.
 
-Despite the transport schema chosen, each point in the cloud has its corresponding time bucket. This time bucket is truncated to an integer from the exact time bucket found in the previous step because the GET system records in discrete time samples.
+Despite the transport schema chosen, each point in the cloud has its corresponding time
+bucket. This time bucket is truncated to an integer from the exact time bucket found in
+the previous step because the GET system records in discrete time samples.
 
-In this simulation, we only allow for transverse diffusion. Although longitudinal diffusion exists, it is not included. The reason is that it was found in early iterations of this simulation to not matter. Only for large longitudinal diffusion values noticeable effects were observed.
+In this simulation, we only allow for transverse diffusion. Although longitudinal 
+diffusion exists, it is not included. The reason is that it was found in early 
+iterations of this simulation to not matter. Only for large longitudinal diffusion 
+values noticeable effects were observed.
+
+## Point cloud labels
+
+Each point cloud also has an associated length N label array of per-point labels. These
+labels indicate which nucleus produced the specific point in the cloud. The labels are 
+given by the index of the nucleus in the kinematics list. For example, in a simple one 
+step reaction a(b,c)d a=0, b=1, c=2, d=3. In a two step, a(b,c)d->e+f, e=4, f=5, and so
+on for more complex scenarios. These labels are particularly useful for evaluating the
+performance of machine learning methods like clustering in downstream analyses.
 
 ## Why Point clouds
 
-It is worth elaborating why the detector system outputs point clouds instead of raw traces. Using raw traces would allow for the full testing of AT-TPC analysis beginning with the signal analysis. As with any Monte Carlo simulation, attpc_engine needs enough samples to converge upon a meaningful result. To this end, it is often desireable to simulate a large numbber (more than 100,000) of events in the AT-TPC. Traces require such a large memory footprint, both on disk and in memory, that they are prohibitive to simulating enough samples to reach convergence. Additionally, methods which effectively mimic the True response of the AT-TPC pad plane and electronics are often dubious, relying on approximations that are hard to validate.
+It is worth elaborating why the detector system outputs point clouds instead of raw 
+traces. Using raw traces would allow for the full testing of AT-TPC analysis beginning 
+with the signal analysis. As with any Monte Carlo simulation, attpc_engine needs enough
+samples to converge upon a meaningful result. To this end, it is often desireable to
+simulate a large numbber (more than 100,000) of events in the AT-TPC. Traces require 
+such a large memory footprint, both on disk and in memory, that they are prohibitive to
+simulating enough samples to reach convergence. Additionally, methods which effectively
+mimic the True response of the AT-TPC pad plane and electronics are often dubious, 
+relying on approximations that are hard to validate.
 
 ## Writing the Point cloud to disk
 
-The final step of the process is to write the simulated detector response to disk. attpc_engine aims to be analysis agnostic, and as such does not fomrally define the format of the output. Instead, attpc_engine provides a `SimulationWriter` [protocol](https://typing.readthedocs.io/en/latest/spec/protocol.html#protocols) class. Users can define their own writer, so long as it implements the methods required by the `SimulationWriter` protocol.
+The final step of the process is to write the simulated detector response and labels to disk. 
+attpc_engine aims to be analysis agnostic, and as such does not fomrally define the 
+format of the output. Instead, attpc_engine provides a `SimulationWriter` 
+[protocol](https://typing.readthedocs.io/en/latest/spec/protocol.html#protocols) class.
+Users can define their own writer, so long as it implements the methods required by the 
+`SimulationWriter` protocol.
 
 ### Use with Spyral
 
-For convience, attpc_engine includes a `SpyralWriter`, which will output a HDF5 file formatted to mimic [Spyral](https://attpc.github.io/Spyral), the AT-TPC group-supported analysis framework. `SpyralWriter` also includes the useful feature of splitting the output into multiple files based on number of events, which can help take advantage of parallel analysis pipelines. Note that attpc_engine does not claim to provide values in the integral or amplitude fields of a Spyral point cloud that will match real data; `SpyralWriter` use the [theoretical response function](https://www.sciencedirect.com/science/article/pii/S0168900216309408?casa_token=DAWbhPvX49MAAAAA:Zen7nFs-pgG9wu__g0rrzus01B1pa7ZYspbz_KOnn-dZyhXNglEqtgywFKwYrvKKsIwEY10n49XV) of the GET electronics to make an attempt to approximate the amplitude and integral, but this should be regarded as dubious at best.
-
-Given that attpc_engine outputs point clouds, the results should not be run through the PointcloudPhase, rather starting directly with the ClusterPhase. Note that because Spyral expects trace data, one will need to "fake" the trace datapath. The simplest way to do this is to point the trace path to the Pointcloud directory of the workspace. Just be sure not to run the PointcloudPhase in this case!
+For convience, attpc_engine includes a `SpyralWriter`, which will output a HDF5 file 
+formatted to mimic [Spyral](https://attpc.github.io/Spyral), the AT-TPC group-supported
+analysis framework. `SpyralWriter` also includes the useful feature of splitting the 
+output into multiple files based on number of events, which can help take advantage of 
+parallel analysis pipelines. Note that attpc_engine does not claim to provide values in
+the integral or amplitude fields of a Spyral point cloud that will match real data; 
+`SpyralWriter` use the [theoretical response function](https://www.sciencedirect.com/science/article/pii/S0168900216309408?casa_token=DAWbhPvX49MAAAAA:Zen7nFs-pgG9wu__g0rrzus01B1pa7ZYspbz_KOnn-dZyhXNglEqtgywFKwYrvKKsIwEY10n49XV)
+of the GET electronics to make an attempt to approximate the amplitude and integral, 
+but this should be regarded as dubious at best.
+
+Given that attpc_engine outputs point clouds, the results should not be run through the
+PointcloudPhase, rather starting directly with the ClusterPhase. Note that because 
+Spyral expects trace data, one will need to "fake" the trace datapath. The simplest 
+way to do this is to point the trace path to the Pointcloud directory of the workspace.
+Just be sure not to run the PointcloudPhase in this case!
+
+Along with the normal point cloud data, the writer also writes the associated labels
+under the same "cloud" group. Each label array is named "label_#" where # is the event
+number for that label array.
diff --git a/docs/user_guide/getting_started.md b/docs/user_guide/getting_started.md
index 29287ee..8b26230 100644
--- a/docs/user_guide/getting_started.md
+++ b/docs/user_guide/getting_started.md
@@ -143,7 +143,7 @@ from attpc_engine.detector import (
 )
 
 from attpc_engine import nuclear_map
-from spyral_utils.nuclear.target import TargetData, GasTarget
+from spyral_utils.nuclear.target import load_target, GasTarget
 from pathlib import Path
 
 input_path = Path("./output/kinematics/c16dd_d2_300Torr_184MeV.h5")
diff --git a/src/attpc_engine/detector/simulator.py b/src/attpc_engine/detector/simulator.py
index f3bf3b0..88650cc 100644
--- a/src/attpc_engine/detector/simulator.py
+++ b/src/attpc_engine/detector/simulator.py
@@ -20,19 +20,22 @@
 def dict_to_points(
     points: NumbaTypedDict[int, tuple[int, int]],
 ) -> tuple[np.ndarray, np.ndarray]:
-    """
+    """Convert a point dictionary to a pointcloud array
+
     Converts dictionary of N pad,tb keys with corresponding number of electrons
     to Nx3 array where each row is [pad, tb, e], now combined over pad/tb combos.
+    Also returns a length N array which contains labels (particle index) for each
+    point.
 
     Parameters
     ----------
-    points: numba.typed.Dict[int, int]
-        A dictionary mapping a unique pad,tb key to the number of electrons.
+    points: numba.typed.Dict[int, tuple[int,int]]
+        A dictionary mapping a unique pad,tb key to the number of electrons and a label
 
     Returns
     -------
     tuple[numpy.ndarray, numpy.ndarray]
-        Array of points and lables (in that order)
+        Array of points and labels (in that order)
     """
     point_array = np.empty((len(points), 3), dtype=float)
     label_array = np.empty(len(points), dtype=types.int64)
@@ -55,6 +58,38 @@ def simulate(
     rng: Generator,
     indicies: list[int],
 ) -> tuple[np.ndarray, np.ndarray]:
+    """Apply detector simulation to a kinematics event
+
+    Takes in the data from a sample from the kinematics phase space
+    and applies detector effects to that sample, generating a pointcloud with
+    labels.
+
+    Parameters
+    ----------
+    momenta: numpy.ndarray
+        The 4-momenta of the nuclei in the reaction
+    vertex: numpy.ndarray
+        The reaction vertex
+    proton_numbers: numpy.ndarray
+        The proton number for each nucleus in the reaction
+    mass_numbers: numpy.ndarray
+        The mass number for each nucleus in the reaction
+    config: Config
+        The detector simulation parameters
+    rng: numpy.random.Generator
+        A random number generator
+    indicies: list[int]
+        The indicies in the list of nuclei which should be simulated.
+        Typically this would be all final products of the reaction
+
+    Returns
+    -------
+    tuple[numpy.ndarray, numpy.ndarray]
+        Returns two arrays. The first is a point cloud of Nx3 shape, where N is
+        the number of points and each point contains a pad ID, time bucket, and
+        electrons deposited. The second is an array of length N, where each entry
+        is the index of the nucleus which generated the point.
+    """
     points = Dict.empty(
         key_type=types.int64, value_type=types.Tuple(types=[types.int64, types.int64])
     )
@@ -86,19 +121,25 @@ def run_simulation(
     writer: SimulationWriter,
     indicies: list[int] | None = None,
 ):
-    """Run the simulation
+    """Run the detector simulation
 
-    Runs the AT-TPC simulation with the input parameters on the specified
-    kinematic data hdf5 file generated by the kinematic pipeline.
+    Runs the AT-TPC detector simulation with the input parameters on the specified
+    kinematic data hdf5 file generated by the kinematic simulation.
 
     Parameters
-     ----------
+    ----------
     config: Config
         The simulation configuration
     input_path: pathlib.Path
         Path to HDF5 file containing kinematics
     writer: SimulationWriter
         An object which implements the SimulationWriter Protocol
+    indicies: list[int] | None
+        List of which nuclei to include in the detector simulation. Nuclei are
+        specified by index of which they occur in the kinematic arrays. For example,
+        in a simple one step reaction, a(b,c)d 0=a, 1=b, 2=c, 3=d. For two step
+        a(b,c)d->e+f e=4, f=5, and so on. If the list is None, all final reaction
+        products are simulated.
     """
     print("------- AT-TPC Simulation Engine -------")
     print(f"Applying detector effects to kinematics from file: {input_path}")
diff --git a/src/attpc_engine/detector/solver.py b/src/attpc_engine/detector/solver.py
index a51b32f..ae20c07 100644
--- a/src/attpc_engine/detector/solver.py
+++ b/src/attpc_engine/detector/solver.py
@@ -38,9 +38,9 @@ def equation_of_motion(
         the magnitude of the magnetic field
     efield: float
         the magnitude of the electric field
-    target: Target
+    target: spyral_utils.nuclear.GasTarget
         the material through which the particle travels
-    ejectile: NucleusData
+    ejectile: spyral_utils.nuclear.NucleusData
         ejectile (from the reaction) nucleus data
 
     Returns
@@ -225,7 +225,7 @@ def rho_bound_condition(
         the magnitude of the magnetic field, unused
     efield: float
         the magnitude of the electric field, unused
-    target: Target
+    target: spyral_utils.nuclear.GasTarget
         the material through which the particle travels, unused
     ejectile: NucleusData
         ejectile (from the reaction) nucleus data, unused
@@ -252,8 +252,14 @@ def generate_trajectory(
 
     Parameters
     ----------
-    config: Config
-        The simulation configuration
+    vertex: numpy.ndarray
+        The reaction vertex
+    momentum: numpy.ndarray
+        The inital 4-momentum of the nucleus
+    nucleus: spyral_utils.nuclear.NucleusData
+        The particle species charge, mass, etc.
+    params: DetectorParams
+        Parameters defining detector shape, fields, material, etc.
 
     Returns
     -------
@@ -306,11 +312,13 @@ def generate_electrons(
 
     Parameters
     ----------
-    config: Config
-        The simulation configuration
     track: numpy.ndarray
         Nx6 array where each row is a solution to one of the ODEs evaluated at
         the Nth time step.
+    nucleus: spyral_utils.nuclear.NucleusData
+        The particle species charge, mass, etc.
+    params: DetectorParams
+        Parameters defining detector geometry, fields, material, etc.
     rng: numpy.random.Generator
         numpy random number generator
 
@@ -345,22 +353,31 @@ def generate_point_cloud(
     nucleus: NucleusData,
     config: Config,
     rng: Generator,
-    points: NumbaTypedDict,
+    points: NumbaTypedDict[int, tuple[int, int]],
     label: int,
 ) -> None:
-    """Create the point cloud
+    """Create a point cloud from a given particle
 
-    Finds the pads hit by the electrons transported from each point
-    of the nucleus' trajectory to the pad plane.
+    Generates the trajectory of a particle, propagates electron transport,
+    and evaluates which pad on the AT-TPC detector plane the signal is recorded.
 
     Parameters
     ----------
+    momentum: numpy.ndarray
+        The particle 4-momentum
+    vertex: numpy.ndarray
+        The reaction vertex
+    nucleus: spyral_utils.nuclear.NucleusData
+        The particle species charge, mass, etc.
     config: Config
         The simulation configuration
     rng: numpy.random.Generator
         numpy random number generator
-    points: numba.typed.Dict[int, int]
-        A dictionary mapping a unique pad,tb key to the number of electrons.
+    points: numba.typed.Dict[int, tuple[int,int]]
+        A dictionary mapping a unique pad,tb key to the number of electrons and a label.
+        The points created by this nucleus are stored here.
+    label: int
+        The label for points generated by this nucleus.
     """
     # Generate nucleus' track and calculate the electrons made at each point
     track = generate_trajectory(vertex, momentum, nucleus, config.det_params)
@@ -381,7 +398,7 @@ def generate_point_cloud(
     ) / dv + config.elec_params.micromegas_edge
 
     if config.pad_grid_edges is None or config.pad_grid is None:
-        raise ValueError("Pad grid is not loaded at SimParticle.generate_hits!")
+        raise ValueError("Pad grid is not loaded at generate_point_cloud!")
 
     transport_track(
         config.pad_grid,
diff --git a/src/attpc_engine/detector/transporter.py b/src/attpc_engine/detector/transporter.py
index 5037ee0..35ed1c9 100644
--- a/src/attpc_engine/detector/transporter.py
+++ b/src/attpc_engine/detector/transporter.py
@@ -12,7 +12,8 @@
 def bivariate_normal_pdf(
     point: tuple[float, float], mu: tuple[float, float], sigma: float
 ) -> float:
-    """
+    """Sample a bivariate normal pdf
+
     Equation of PDF corresponding to a 2D normal distribution where sigma is the same
     for both x and y and there is no correlation between the two variables.
 
@@ -42,7 +43,8 @@ def bivariate_normal_pdf(
 
 @njit
 def meshgrid(xarr: np.ndarray, yarr: np.ndarray) -> np.ndarray:
-    """
+    """JIT-ed implementation of meshgrid
+
     Creates a rectangular (x, y) grid from the input arrays and
     returns a Nx2 array where each row is a point in the mesh.
 
@@ -77,7 +79,8 @@ def meshgrid(xarr: np.ndarray, yarr: np.ndarray) -> np.ndarray:
 def position_to_index(
     grid_edges: np.ndarray, position: tuple[float, float]
 ) -> tuple[int, int]:
-    """
+    """Convert a position to pad map index
+
     Given an input position in (x, y), outputs the index on the pad map
     corresponding to that position. For information about the format of
     the pad map, see the load_pad_grid method of the Config class in
@@ -127,7 +130,8 @@ def point_transport(
     points: NumbaTypedDict[int, tuple[int, int]],
     label: int,
 ):
-    """
+    """Transport electrons without diffusion
+
     Transports all electrons created at a point in a simulated nucleus' track
     straight to the pad plane.
 
@@ -144,9 +148,11 @@ def point_transport(
         (x,y) position of point being transported.
     electrons: int
         Number of electrons made at point being transported.
-    points: numba.typed.Dict[int, int]
+    points: numba.typed.Dict[int, tuple[int, int]]
         A dictionary mapping a unique pad,tb key to the number of electrons, which
         will be filled by this function
+    label: int
+        The label for all points created in this call
     """
     # Find pad number of hit pad, if it exists
     index_x, index_y = position_to_index(grid_edges, center)
@@ -174,7 +180,8 @@ def transverse_transport(
     points: NumbaTypedDict[int, tuple[int, int]],
     label: int,
 ):
-    """
+    """Transport electrons with transverse diffusion
+
     Transports all electrons created at a point in a simulated nucleus'
     track to the pad plane with transverse diffusion applied. This is done by
     creating a square mesh roughly centered on the point where the electrons are
@@ -201,9 +208,11 @@ def transverse_transport(
     sigma_t: float
         Standard deviation of transverse diffusion at point
         being transported.
-    points: numba.typed.Dict[int, int]
-        A dictionary mapping a unique pad,tb key to the number of electrons, which
-        will be filled by this function
+    points: numba.typed.Dict[int, tuple[int, int]]
+        A dictionary mapping a unique pad,tb key to the number of electrons and  label,
+        which will be filled by this function
+    label: int
+        The label for all points in this call
     """
     mesh_centerx: float = center[0]
     mesh_centery: float = center[1]
@@ -252,7 +261,8 @@ def transport_track(
     points: NumbaTypedDict[int, tuple[int, int]],
     label: int,
 ):
-    """
+    """Transport electrons generated by a trajectory to the AT-TPC pad plane
+
     High-level function that transports each point in a nucleus' trajectory
     to the pad plane, applying transverse diffusion if specified.
 
@@ -275,9 +285,12 @@ def transport_track(
         Nx6 array where each row is a solution to one of the ODEs evaluated at
         the Nth time step.
     electrons: np.ndarray
-        1xN array of electrons created each time step (point) of the trajectory.
-    points: numba.typed.Dict[int, int]
-        A dictionary mapping a unique pad,tb key to the number of electrons.
+        Length N array of electrons created each time step (point) of the trajectory.
+    points: numba.typed.Dict[int, tuple[int, int]]
+        A dictionary mapping a unique pad,tb key to the number of electrons and a label,
+        which will be filled by this function.
+    label: int
+        The label for all points generated by this call
     """
 
     # Each point is a TB/pad combo in the TPC
diff --git a/src/attpc_engine/detector/writer.py b/src/attpc_engine/detector/writer.py
index 97bdeda..a079e24 100644
--- a/src/attpc_engine/detector/writer.py
+++ b/src/attpc_engine/detector/writer.py
@@ -15,25 +15,27 @@ class SimulationWriter(Protocol):
 
     Methods
     -------
-    write(data: np.ndarray, config: Config, event_number: int) -> None
+    write(data, labels, config, event_number)
         Writes a simulated point cloud to the point cloud file.
-    get_filename() -> Path
+    get_directory_name()
         Returns directory that point cloud files are written to.
-    close() -> None
+    close()
         Closes the writer.
     """
 
     def write(
         self, data: np.ndarray, labels: np.ndarray, config: Config, event_number: int
     ) -> None:
-        """
-        Writes a simulated point cloud to the point cloud file.
+        """Writes a simulated point cloud to the point cloud file.
 
         Parameters
         ----------
-        data: np.ndarray
+        data: numpy.ndarray
             An Nx3 array representing the point cloud. Each row is a point, with elements
             [pad id, time bucket, electrons].
+        labels: numpy.ndarray
+            A length N array containing labels for points in the pointcloud, indicating
+            which nucleus produced the point.
         config: Config
             The simulation configuration.
         event_number: int
@@ -42,15 +44,17 @@ def write(
         pass
 
     def get_directory_name(self) -> Path:  # type: ignore
-        """
-        Returns directory that point cloud files are written to.
+        """Returns directory that point cloud files are written to.
+
+        Returns
+        -------
+        pathlib.Path
+            The path the output directory
         """
         pass
 
     def close(self) -> None:
-        """
-        Closes the writer.
-        """
+        """Closes the writer."""
         pass
 
 
@@ -63,10 +67,8 @@ def convert_to_spyral(
     response: np.ndarray,
     pad_centers: np.ndarray,
     pad_sizes: np.ndarray,
-    adc_threshold: int,
 ) -> np.ndarray:
-    """
-    Converts a simulated point in the point cloud to Spyral formatting.
+    """Converts a simulated point in the point cloud to Spyral formatting.
 
     Parameters
     ----------
@@ -111,13 +113,14 @@ def convert_to_spyral(
 
 
 class SpyralWriter:
-    """
-    Writer for default Spyral analysis. Writes the simulated data into multiple
-    files to take advantage of Spyral's multiprocessing.
+    """Writer for default Spyral analysis.
+
+    Writes the simulated data into multiple files to take advantage of
+    Spyral's multiprocessing.
 
     Parameters
     ----------
-    directory_path: Path
+    directory_path: pathlib.Path
         Path to directory to store simulated point cloud files.
     config: Config
         The simulation configuration.
@@ -132,7 +135,7 @@ class SpyralWriter:
     ----------
     directory_path: pathlib.Path
         The path to the directory data will be written to
-    response: np.ndarray
+    response: numpy.ndarray
         Response of GET electronics.
     max_events_per_file: int
         The maximum number of events per file
@@ -142,19 +145,19 @@ class SpyralWriter:
         The first event number of the file currently being written to
     events_written: int
         The number of events that have been written
-    file: h5.File
+    file: h5py.File
         h5 file object. It is the actual point cloud file currently
         being written to.
-    cloud_group: h5.Group
+    cloud_group: h5py.Group
         "cloud" group in current point cloud file.
 
     Methods
     -------
-    write(data: np.ndarray, config: Config, event_number: int) -> None
+    write(data, config, event_number)
         Writes a simulated point cloud to the point cloud file.
-    get_filename() -> Path
+    get_directory_name()
         Returns directory that point cloud files are written to.
-    close() -> None
+    close()
         Closes the writer with metadata written
     """
 
@@ -163,7 +166,7 @@ def __init__(
         directory_path: Path,
         config: Config,
         max_events_per_file: int = 5_000,
-        first_run_number=0,
+        first_run_number: int = 0,
     ):
         self.directory_path: Path = directory_path
         self.response: np.ndarray = get_response(config).copy()
@@ -195,9 +198,12 @@ def write(
 
         Parameters
         ----------
-        data: np.ndarray
+        data: numpy.ndarray
             An Nx3 array representing the point cloud. Each row is a point, with elements
             [pad id, time bucket, electrons].
+        labels: numpy.ndarray
+            A length N array of labels indicating which nucleus produced the point in
+            the point cloud.
         config: Config
             The simulation configuration.
         event_number: int
@@ -220,7 +226,6 @@ def write(
             self.response,
             config.pad_centers,
             config.pad_sizes,
-            config.elec_params.adc_threshold,
         )
         # apply ADC threshold
         mask = spyral_format[:, 3] > config.elec_params.adc_threshold