From 7b2d5a8df2b266a627bdd39d5d7a5cf1dc0731d9 Mon Sep 17 00:00:00 2001
From: Connor Johnstone <connor.johnstone@arcfield.com>
Date: Fri, 24 Oct 2025 12:26:11 -0400
Subject: [PATCH] Worked out lazy interp

---
 src/integrator/mod.rs   |  40 ++++++
 src/integrator/vern7.rs | 300 ++++++++++++++++++++++++++++++++++++++--
 src/problem.rs          |  57 ++++++--
 3 files changed, 377 insertions(+), 20 deletions(-)
diff --git a/src/integrator/mod.rs b/src/integrator/mod.rs
index 71a1b82..c53f5a5 100644
--- a/src/integrator/mod.rs
+++ b/src/integrator/mod.rs
@@ -13,6 +13,16 @@ pub trait Integrator<const D: usize> {
     const STAGES: usize;
     const ADAPTIVE: bool;
     const DENSE: bool;
+
+    /// Number of main stages stored in dense output (default: same as STAGES)
+    const MAIN_STAGES: usize = Self::STAGES;
+
+    /// Number of extra stages for full-order dense output (default: 0, no extra stages)
+    const EXTRA_STAGES: usize = 0;
+
+    /// Total stages when full dense output is computed
+    const TOTAL_DENSE_STAGES: usize = Self::MAIN_STAGES + Self::EXTRA_STAGES;
+
     /// Returns a new y value, then possibly an error value, and possibly a dense output
     /// coefficient set
     fn step<P>(
@@ -20,6 +30,7 @@ pub trait Integrator<const D: usize> {
         ode: &ODE<D, P>,
         h: f64,
     ) -> (SVector<f64, D>, Option<f64>, Option<Vec<SVector<f64, D>>>);
+
     fn interpolate(
         &self,
         t_start: f64,
@@ -27,6 +38,35 @@ pub trait Integrator<const D: usize> {
         dense: &[SVector<f64, D>],
         t: f64,
     ) -> SVector<f64, D>;
+
+    /// Compute extra stages for full-order dense output (lazy computation).
+    ///
+    /// Most integrators don't need this and return an empty vector by default.
+    /// High-order methods like Vern7 override this to compute additional stages
+    /// needed for full-order interpolation accuracy.
+    ///
+    /// # Arguments
+    ///
+    /// * `ode` - The ODE problem (provides derivative function)
+    /// * `t_start` - Start time of the integration step
+    /// * `y_start` - State at the start of the step
+    /// * `h` - Step size
+    /// * `main_stages` - The main k-stages from step()
+    ///
+    /// # Returns
+    ///
+    /// Vector of extra k-stages (empty for most integrators)
+    fn compute_extra_stages<P>(
+        &self,
+        _ode: &ODE<D, P>,
+        _t_start: f64,
+        _y_start: SVector<f64, D>,
+        _h: f64,
+        _main_stages: &[SVector<f64, D>],
+    ) -> Vec<SVector<f64, D>> {
+        // Default implementation: no extra stages needed
+        Vec::new()
+    }
 }
 
 #[cfg(test)]
diff --git a/src/integrator/vern7.rs b/src/integrator/vern7.rs
index bd4a8eb..cba482a 100644
--- a/src/integrator/vern7.rs
+++ b/src/integrator/vern7.rs
@@ -12,6 +12,16 @@ pub trait Vern7Integrator<'a> {
     const R: &'a [f64]; // Interpolation coefficients
 }
 
+/// Verner 7 extra stages trait for lazy dense output
+///
+/// These coefficients define the 6 additional Runge-Kutta stages (k11-k16)
+/// needed for full 7th order dense output interpolation. They are computed
+/// lazily only when interpolation is requested.
+pub trait Vern7ExtraStages<'a> {
+    const C_EXTRA: &'a [f64]; // Time nodes for extra stages (c11-c16)
+    const A_EXTRA: &'a [f64]; // A-matrix entries for extra stages (flattened)
+}
+
 /// Verner's "Most Efficient" 7(6) method
 ///
 /// A 7th order explicit Runge-Kutta method with an embedded 6th order method for
@@ -171,15 +181,103 @@ impl<'a, const D: usize> Vern7Integrator<'a> for Vern7<D> {
     const R: &'a [f64] = &[];
 }
 
+impl<'a, const D: usize> Vern7ExtraStages<'a> for Vern7<D> {
+    // Time nodes for extra stages
+    const C_EXTRA: &'a [f64] = &[
+        1.0,    // c11
+        0.29,   // c12
+        0.125,  // c13
+        0.25,   // c14
+        0.53,   // c15
+        0.79,   // c16
+    ];
+
+    // A-matrix coefficients for extra stages (flattened)
+    // Each stage uses only k1, k4-k9 from main stages, plus previously computed extra stages
+    //
+    // Stage 11: uses k1, k4, k5, k6, k7, k8, k9
+    // Stage 12: uses k1, k4, k5, k6, k7, k8, k9, k11
+    // Stage 13: uses k1, k4, k5, k6, k7, k8, k9, k11, k12
+    // Stage 14: uses k1, k4, k5, k6, k7, k8, k9, k11, k12, k13
+    // Stage 15: uses k1, k4, k5, k6, k7, k8, k9, k11, k12, k13
+    // Stage 16: uses k1, k4, k5, k6, k7, k8, k9, k11, k12, k13
+    const A_EXTRA: &'a [f64] = &[
+        // Stage 11 (7 coefficients): a1101, a1104, a1105, a1106, a1107, a1108, a1109
+        0.04715561848627222,
+        0.25750564298434153,
+        0.2621665397741262,
+        0.15216092656738558,
+        0.49399691700324844,
+        -0.29430311714032503,
+        0.0813174723249511,
+        // Stage 12 (8 coefficients): a1201, a1204, a1205, a1206, a1207, a1208, a1209, a1211
+        0.0523222769159969,
+        0.22495861826705715,
+        0.017443709248776376,
+        -0.007669379876829393,
+        0.03435896044073285,
+        -0.0410209723009395,
+        0.025651133005205617,
+        -0.0160443457,
+        // Stage 13 (9 coefficients): a1301, a1304, a1305, a1306, a1307, a1308, a1309, a1311, a1312
+        0.053053341257859085,
+        0.12195301011401886,
+        0.017746840737602496,
+        -0.0005928372667681495,
+        0.008381833970853752,
+        -0.01293369259698612,
+        0.009412056815253861,
+        -0.005353253107275676,
+        -0.06666729992455811,
+        // Stage 14 (10 coefficients): a1401, a1404, a1405, a1406, a1407, a1408, a1409, a1411, a1412, a1413
+        0.03887903257436304,
+        -0.0024403203308301317,
+        -0.0013928917214672623,
+        -0.00047446291558680135,
+        0.00039207932413159514,
+        -0.00040554733285128004,
+        0.00019897093147716726,
+        -0.00010278198793179169,
+        0.03385661513870267,
+        0.1814893063199928,
+        // Stage 15 (10 coefficients): a1501, a1504, a1505, a1506, a1507, a1508, a1509, a1511, a1512, a1513
+        0.05723681204690013,
+        0.22265948066761182,
+        0.12344864200186899,
+        0.04006332526666491,
+        -0.05269894848581452,
+        0.04765971214244523,
+        -0.02138895885042213,
+        0.015193891064036402,
+        0.12060546716289655,
+        -0.022779423016187374,
+        // Stage 16 (10 coefficients): a1601, a1604, a1605, a1606, a1607, a1608, a1609, a1611, a1612, a1613
+        0.051372038802756814,
+        0.5414214473439406,
+        0.350399806692184,
+        0.14193112269692182,
+        0.10527377478429423,
+        -0.031081847805874016,
+        -0.007401883149519145,
+        -0.006377932504865363,
+        -0.17325495908361865,
+        -0.18228156777622026,
+    ];
+}
+
 impl<'a, const D: usize> Integrator<D> for Vern7<D>
 where
-    Vern7<D>: Vern7Integrator<'a>,
+    Vern7<D>: Vern7Integrator<'a> + Vern7ExtraStages<'a>,
 {
     const ORDER: usize = 7;
     const STAGES: usize = 10;
     const ADAPTIVE: bool = true;
     const DENSE: bool = true;
 
+    // Lazy dense output configuration
+    const MAIN_STAGES: usize = 10;
+    const EXTRA_STAGES: usize = 6;
+
     fn step<P>(
         &self,
         ode: &ODE<D, P>,
@@ -235,8 +333,10 @@ where
         t: f64,
     ) -> SVector<f64, D> {
         // Vern7 uses 7th order polynomial interpolation
-        // The full implementation would compute 6 extra stages (k11-k16) lazily
-        // For now, we use a simplified version with just the main 10 stages
+        // Check if extra stages (k11-k16) are available
+        // Dense array format: [y0, y1, k1, k2, ..., k10, k11, ..., k16]
+        // With main stages only: length = 2 + 10 = 12
+        // With all stages: length = 2 + 10 + 6 = 18
 
         let theta = (t - t_start) / (t_end - t_start);
         let theta2 = theta * theta;
@@ -256,10 +356,6 @@ where
         let k9 = &dense[10]; // k9
         // k10 is at dense[11] but not used in interpolation
 
-        // Interpolation polynomial coefficients (from Julia source)
-        // b1Θ = Θ * evalpoly(Θ, [r011, r012, ..., r017])
-        // b4Θ through b9Θ = Θ² * evalpoly(Θ, [coeffs])
-
         // Helper to evaluate polynomial using Horner's method
         #[inline]
         fn evalpoly(x: f64, coeffs: &[f64]) -> f64 {
@@ -336,15 +432,197 @@ where
             -52.85010499525706,
         ]);
 
-        // Compute interpolated value
-        // Full implementation would also include k11-k16 with their own polynomials
-        y0 + h * (k1 * b1_theta +
+        // Compute base interpolation with main stages
+        let mut result = y0 + h * (k1 * b1_theta +
                   k4 * b4_theta +
                   k5 * b5_theta +
                   k6 * b6_theta +
                   k7 * b7_theta +
                   k8 * b8_theta +
-                  k9 * b9_theta)
+                  k9 * b9_theta);
+
+        // If extra stages are available, add their contribution for full 7th order accuracy
+        if dense.len() >= 2 + Self::TOTAL_DENSE_STAGES {
+            // Extra stages are at indices 12-17
+            let k11 = &dense[12];
+            let k12 = &dense[13];
+            let k13 = &dense[14];
+            let k14 = &dense[15];
+            let k15 = &dense[16];
+            let k16 = &dense[17];
+
+            // Stages 11-16: all start at degree 2
+            let b11_theta = theta2 * evalpoly(theta, &[
+                -2.1696320280163506,
+                22.016696037569876,
+                -86.90152427798948,
+                159.22388973861476,
+                -135.9618306534588,
+                43.792401183280006,
+            ]);
+
+            let b12_theta = theta2 * evalpoly(theta, &[
+                -4.890070188793804,
+                22.75407737425176,
+                -30.78034218537731,
+                -2.797194317207249,
+                31.369456637508403,
+                -15.655927320381801,
+            ]);
+
+            let b13_theta = theta2 * evalpoly(theta, &[
+                10.862170929551967,
+                -50.542971417827104,
+                68.37148040407511,
+                6.213326521632409,
+                -69.68006323194157,
+                34.776056794509195,
+            ]);
+
+            let b14_theta = theta2 * evalpoly(theta, &[
+                -11.37286691922923,
+                130.79058078246717,
+                -488.65113677785604,
+                832.2148793276441,
+                -664.7743368554426,
+                201.79288044241662,
+            ]);
+
+            let b15_theta = theta2 * evalpoly(theta, &[
+                -5.919778732715007,
+                63.27679965889219,
+                -265.432682088738,
+                520.1009254140611,
+                -467.412109533902,
+                155.3868452824017,
+            ]);
+
+            let b16_theta = theta2 * evalpoly(theta, &[
+                -10.492146197961823,
+                105.35538525188011,
+                -409.43975011988937,
+                732.831448907654,
+                -606.3044574733512,
+                188.0495196316683,
+            ]);
+
+            // Add contribution from extra stages
+            result += h * (k11 * b11_theta +
+                          k12 * b12_theta +
+                          k13 * b13_theta +
+                          k14 * b14_theta +
+                          k15 * b15_theta +
+                          k16 * b16_theta);
+        }
+
+        result
+    }
+
+    fn compute_extra_stages<P>(
+        &self,
+        ode: &ODE<D, P>,
+        t_start: f64,
+        y_start: SVector<f64, D>,
+        h: f64,
+        main_stages: &[SVector<f64, D>],
+    ) -> Vec<SVector<f64, D>> {
+        // Extract main stages that are used in extra stage computation
+        // From Julia: extra stages use k1, k4, k5, k6, k7, k8, k9
+        let k1 = &main_stages[0];
+        let k4 = &main_stages[3];
+        let k5 = &main_stages[4];
+        let k6 = &main_stages[5];
+        let k7 = &main_stages[6];
+        let k8 = &main_stages[7];
+        let k9 = &main_stages[8];
+
+        let mut extra_stages = Vec::with_capacity(Self::EXTRA_STAGES);
+
+        // Stage 11: uses k1, k4-k9 (7 coefficients)
+        let mut y11 = y_start;
+        y11 += k1 * Self::A_EXTRA[0] * h;
+        y11 += k4 * Self::A_EXTRA[1] * h;
+        y11 += k5 * Self::A_EXTRA[2] * h;
+        y11 += k6 * Self::A_EXTRA[3] * h;
+        y11 += k7 * Self::A_EXTRA[4] * h;
+        y11 += k8 * Self::A_EXTRA[5] * h;
+        y11 += k9 * Self::A_EXTRA[6] * h;
+        let k11 = (ode.f)(t_start + Self::C_EXTRA[0] * h, y11, &ode.params);
+        extra_stages.push(k11);
+
+        // Stage 12: uses k1, k4-k9, k11 (8 coefficients)
+        let mut y12 = y_start;
+        y12 += k1 * Self::A_EXTRA[7] * h;
+        y12 += k4 * Self::A_EXTRA[8] * h;
+        y12 += k5 * Self::A_EXTRA[9] * h;
+        y12 += k6 * Self::A_EXTRA[10] * h;
+        y12 += k7 * Self::A_EXTRA[11] * h;
+        y12 += k8 * Self::A_EXTRA[12] * h;
+        y12 += k9 * Self::A_EXTRA[13] * h;
+        y12 += &extra_stages[0] * Self::A_EXTRA[14] * h; // k11
+        let k12 = (ode.f)(t_start + Self::C_EXTRA[1] * h, y12, &ode.params);
+        extra_stages.push(k12);
+
+        // Stage 13: uses k1, k4-k9, k11, k12 (9 coefficients)
+        let mut y13 = y_start;
+        y13 += k1 * Self::A_EXTRA[15] * h;
+        y13 += k4 * Self::A_EXTRA[16] * h;
+        y13 += k5 * Self::A_EXTRA[17] * h;
+        y13 += k6 * Self::A_EXTRA[18] * h;
+        y13 += k7 * Self::A_EXTRA[19] * h;
+        y13 += k8 * Self::A_EXTRA[20] * h;
+        y13 += k9 * Self::A_EXTRA[21] * h;
+        y13 += &extra_stages[0] * Self::A_EXTRA[22] * h; // k11
+        y13 += &extra_stages[1] * Self::A_EXTRA[23] * h; // k12
+        let k13 = (ode.f)(t_start + Self::C_EXTRA[2] * h, y13, &ode.params);
+        extra_stages.push(k13);
+
+        // Stage 14: uses k1, k4-k9, k11, k12, k13 (10 coefficients)
+        let mut y14 = y_start;
+        y14 += k1 * Self::A_EXTRA[24] * h;
+        y14 += k4 * Self::A_EXTRA[25] * h;
+        y14 += k5 * Self::A_EXTRA[26] * h;
+        y14 += k6 * Self::A_EXTRA[27] * h;
+        y14 += k7 * Self::A_EXTRA[28] * h;
+        y14 += k8 * Self::A_EXTRA[29] * h;
+        y14 += k9 * Self::A_EXTRA[30] * h;
+        y14 += &extra_stages[0] * Self::A_EXTRA[31] * h; // k11
+        y14 += &extra_stages[1] * Self::A_EXTRA[32] * h; // k12
+        y14 += &extra_stages[2] * Self::A_EXTRA[33] * h; // k13
+        let k14 = (ode.f)(t_start + Self::C_EXTRA[3] * h, y14, &ode.params);
+        extra_stages.push(k14);
+
+        // Stage 15: uses k1, k4-k9, k11, k12, k13 (10 coefficients, reuses k13 not k14)
+        let mut y15 = y_start;
+        y15 += k1 * Self::A_EXTRA[34] * h;
+        y15 += k4 * Self::A_EXTRA[35] * h;
+        y15 += k5 * Self::A_EXTRA[36] * h;
+        y15 += k6 * Self::A_EXTRA[37] * h;
+        y15 += k7 * Self::A_EXTRA[38] * h;
+        y15 += k8 * Self::A_EXTRA[39] * h;
+        y15 += k9 * Self::A_EXTRA[40] * h;
+        y15 += &extra_stages[0] * Self::A_EXTRA[41] * h; // k11
+        y15 += &extra_stages[1] * Self::A_EXTRA[42] * h; // k12
+        y15 += &extra_stages[2] * Self::A_EXTRA[43] * h; // k13
+        let k15 = (ode.f)(t_start + Self::C_EXTRA[4] * h, y15, &ode.params);
+        extra_stages.push(k15);
+
+        // Stage 16: uses k1, k4-k9, k11, k12, k13 (10 coefficients, reuses k13 not k14 or k15)
+        let mut y16 = y_start;
+        y16 += k1 * Self::A_EXTRA[44] * h;
+        y16 += k4 * Self::A_EXTRA[45] * h;
+        y16 += k5 * Self::A_EXTRA[46] * h;
+        y16 += k6 * Self::A_EXTRA[47] * h;
+        y16 += k7 * Self::A_EXTRA[48] * h;
+        y16 += k8 * Self::A_EXTRA[49] * h;
+        y16 += k9 * Self::A_EXTRA[50] * h;
+        y16 += &extra_stages[0] * Self::A_EXTRA[51] * h; // k11
+        y16 += &extra_stages[1] * Self::A_EXTRA[52] * h; // k12
+        y16 += &extra_stages[2] * Self::A_EXTRA[53] * h; // k13
+        let k16 = (ode.f)(t_start + Self::C_EXTRA[5] * h, y16, &ode.params);
+        extra_stages.push(k16);
+
+        extra_stages
     }
 }
 
diff --git a/src/problem.rs b/src/problem.rs
index b3f2cb6..eccedcf 100644
--- a/src/problem.rs
+++ b/src/problem.rs
@@ -1,5 +1,6 @@
 use nalgebra::SVector;
 use roots::{find_root_brent, SimpleConvergency};
+use std::cell::RefCell;
 
 use super::callback::Callback;
 use super::controller::{Controller, PIController, TryStep};
@@ -29,14 +30,14 @@ where
             callbacks: Vec::new(),
         }
     }
-    pub fn solve(&mut self) -> Solution<S, D> {
+    pub fn solve(&mut self) -> Solution<'_, S, D, P> {
         let mut convergency = SimpleConvergency {
             eps: 1e-12,
             max_iter: 1000,
         };
         let mut times: Vec<f64> = vec![self.ode.t];
         let mut states: Vec<SVector<f64, D>> = vec![self.ode.y];
-        let mut dense_coefficients: Vec<Vec<SVector<f64, D>>> = Vec::new();
+        let mut dense_coefficients: Vec<RefCell<Vec<SVector<f64, D>>>> = Vec::new();
         while self.ode.t < self.ode.t_end {
             if self.ode.t + self.controller.next_step_guess.extract() > self.ode.t_end {
                 // If the next step would go past the end, then just set it to the end
@@ -100,9 +101,10 @@ where
             times.push(self.ode.t);
             states.push(self.ode.y);
             // TODO: Implement third order interpolation for non-dense algorithms
-            dense_coefficients.push(dense_option.unwrap());
+            dense_coefficients.push(RefCell::new(dense_option.unwrap()));
         }
         Solution {
+            ode: &self.ode,
             integrator: self.integrator,
             times,
             states,
@@ -121,17 +123,18 @@ where
     }
 }
 
-pub struct Solution<S, const D: usize>
+pub struct Solution<'a, S, const D: usize, P>
 where
     S: Integrator<D>,
 {
+    pub ode: &'a ODE<'a, D, P>,
     pub integrator: S,
     pub times: Vec<f64>,
     pub states: Vec<SVector<f64, D>>,
-    pub dense: Vec<Vec<SVector<f64, D>>>,
+    pub dense: Vec<RefCell<Vec<SVector<f64, D>>>>,
 }
 
-impl<S, const D: usize> Solution<S, D>
+impl<'a, S, const D: usize, P> Solution<'a, S, D, P>
 where
     S: Integrator<D>,
 {
@@ -153,11 +156,47 @@ where
         match times.binary_search_by(|x| x.total_cmp(&t)) {
             Ok(index) => self.states[index],
             Err(end_index) => {
-                // Then send that to the integrator
                 let t_start = times[end_index - 1];
                 let t_end = times[end_index];
-                self.integrator
-                    .interpolate(t_start, t_end, &self.dense[end_index - 1], t)
+                let y_start = self.states[end_index - 1];
+                let h = t_end - t_start;
+
+                // Check if we need to compute extra stages for lazy dense output
+                let dense_cell = &self.dense[end_index - 1];
+
+                if S::EXTRA_STAGES > 0 {
+                    let needs_extra = {
+                        let borrowed = dense_cell.borrow();
+                        // Dense array format: [y0, y1, k1, k2, ..., k_main]
+                        // If we have main stages only: 2 + MAIN_STAGES elements
+                        // If we have all stages: 2 + MAIN_STAGES + EXTRA_STAGES elements
+                        borrowed.len() < 2 + S::TOTAL_DENSE_STAGES
+                    };
+
+                    if needs_extra {
+                        // Compute extra stages and append to dense output
+                        let mut dense = dense_cell.borrow_mut();
+
+                        // Extract main stages (skip y0 and y1 at indices 0 and 1)
+                        let main_stages = &dense[2..2 + S::MAIN_STAGES];
+
+                        // Compute extra stages lazily
+                        let extra_stages = self.integrator.compute_extra_stages(
+                            self.ode,
+                            t_start,
+                            y_start,
+                            h,
+                            main_stages,
+                        );
+
+                        // Append extra stages to dense output (cached for future interpolations)
+                        dense.extend(extra_stages);
+                    }
+                }
+
+                // Now interpolate with the (possibly augmented) dense output
+                let dense = dense_cell.borrow();
+                self.integrator.interpolate(t_start, t_end, &dense, t)
             }
         }
     }