From 7b2d5a8df2b266a627bdd39d5d7a5cf1dc0731d9 Mon Sep 17 00:00:00 2001 From: Connor Johnstone Date: Fri, 24 Oct 2025 12:26:11 -0400 Subject: [PATCH] Worked out lazy interp --- src/integrator/mod.rs | 40 ++++++ src/integrator/vern7.rs | 300 ++++++++++++++++++++++++++++++++++++++-- src/problem.rs | 57 ++++++-- 3 files changed, 377 insertions(+), 20 deletions(-) diff --git a/src/integrator/mod.rs b/src/integrator/mod.rs index 71a1b82..c53f5a5 100644 --- a/src/integrator/mod.rs +++ b/src/integrator/mod.rs @@ -13,6 +13,16 @@ pub trait Integrator { const STAGES: usize; const ADAPTIVE: bool; const DENSE: bool; + + /// Number of main stages stored in dense output (default: same as STAGES) + const MAIN_STAGES: usize = Self::STAGES; + + /// Number of extra stages for full-order dense output (default: 0, no extra stages) + const EXTRA_STAGES: usize = 0; + + /// Total stages when full dense output is computed + const TOTAL_DENSE_STAGES: usize = Self::MAIN_STAGES + Self::EXTRA_STAGES; + /// Returns a new y value, then possibly an error value, and possibly a dense output /// coefficient set fn step

( @@ -20,6 +30,7 @@ pub trait Integrator { ode: &ODE, h: f64, ) -> (SVector, Option, Option>>); + fn interpolate( &self, t_start: f64, @@ -27,6 +38,35 @@ pub trait Integrator { dense: &[SVector], t: f64, ) -> SVector; + + /// Compute extra stages for full-order dense output (lazy computation). + /// + /// Most integrators don't need this and return an empty vector by default. + /// High-order methods like Vern7 override this to compute additional stages + /// needed for full-order interpolation accuracy. + /// + /// # Arguments + /// + /// * `ode` - The ODE problem (provides derivative function) + /// * `t_start` - Start time of the integration step + /// * `y_start` - State at the start of the step + /// * `h` - Step size + /// * `main_stages` - The main k-stages from step() + /// + /// # Returns + /// + /// Vector of extra k-stages (empty for most integrators) + fn compute_extra_stages

( + &self, + _ode: &ODE, + _t_start: f64, + _y_start: SVector, + _h: f64, + _main_stages: &[SVector], + ) -> Vec> { + // Default implementation: no extra stages needed + Vec::new() + } } #[cfg(test)] diff --git a/src/integrator/vern7.rs b/src/integrator/vern7.rs index bd4a8eb..cba482a 100644 --- a/src/integrator/vern7.rs +++ b/src/integrator/vern7.rs @@ -12,6 +12,16 @@ pub trait Vern7Integrator<'a> { const R: &'a [f64]; // Interpolation coefficients } +/// Verner 7 extra stages trait for lazy dense output +/// +/// These coefficients define the 6 additional Runge-Kutta stages (k11-k16) +/// needed for full 7th order dense output interpolation. They are computed +/// lazily only when interpolation is requested. +pub trait Vern7ExtraStages<'a> { + const C_EXTRA: &'a [f64]; // Time nodes for extra stages (c11-c16) + const A_EXTRA: &'a [f64]; // A-matrix entries for extra stages (flattened) +} + /// Verner's "Most Efficient" 7(6) method /// /// A 7th order explicit Runge-Kutta method with an embedded 6th order method for @@ -171,15 +181,103 @@ impl<'a, const D: usize> Vern7Integrator<'a> for Vern7 { const R: &'a [f64] = &[]; } +impl<'a, const D: usize> Vern7ExtraStages<'a> for Vern7 { + // Time nodes for extra stages + const C_EXTRA: &'a [f64] = &[ + 1.0, // c11 + 0.29, // c12 + 0.125, // c13 + 0.25, // c14 + 0.53, // c15 + 0.79, // c16 + ]; + + // A-matrix coefficients for extra stages (flattened) + // Each stage uses only k1, k4-k9 from main stages, plus previously computed extra stages + // + // Stage 11: uses k1, k4, k5, k6, k7, k8, k9 + // Stage 12: uses k1, k4, k5, k6, k7, k8, k9, k11 + // Stage 13: uses k1, k4, k5, k6, k7, k8, k9, k11, k12 + // Stage 14: uses k1, k4, k5, k6, k7, k8, k9, k11, k12, k13 + // Stage 15: uses k1, k4, k5, k6, k7, k8, k9, k11, k12, k13 + // Stage 16: uses k1, k4, k5, k6, k7, k8, k9, k11, k12, k13 + const A_EXTRA: &'a [f64] = &[ + // Stage 11 (7 coefficients): a1101, a1104, a1105, a1106, a1107, a1108, a1109 + 0.04715561848627222, + 0.25750564298434153, + 0.2621665397741262, + 0.15216092656738558, + 0.49399691700324844, + -0.29430311714032503, + 0.0813174723249511, + // Stage 12 (8 coefficients): a1201, a1204, a1205, a1206, a1207, a1208, a1209, a1211 + 0.0523222769159969, + 0.22495861826705715, + 0.017443709248776376, + -0.007669379876829393, + 0.03435896044073285, + -0.0410209723009395, + 0.025651133005205617, + -0.0160443457, + // Stage 13 (9 coefficients): a1301, a1304, a1305, a1306, a1307, a1308, a1309, a1311, a1312 + 0.053053341257859085, + 0.12195301011401886, + 0.017746840737602496, + -0.0005928372667681495, + 0.008381833970853752, + -0.01293369259698612, + 0.009412056815253861, + -0.005353253107275676, + -0.06666729992455811, + // Stage 14 (10 coefficients): a1401, a1404, a1405, a1406, a1407, a1408, a1409, a1411, a1412, a1413 + 0.03887903257436304, + -0.0024403203308301317, + -0.0013928917214672623, + -0.00047446291558680135, + 0.00039207932413159514, + -0.00040554733285128004, + 0.00019897093147716726, + -0.00010278198793179169, + 0.03385661513870267, + 0.1814893063199928, + // Stage 15 (10 coefficients): a1501, a1504, a1505, a1506, a1507, a1508, a1509, a1511, a1512, a1513 + 0.05723681204690013, + 0.22265948066761182, + 0.12344864200186899, + 0.04006332526666491, + -0.05269894848581452, + 0.04765971214244523, + -0.02138895885042213, + 0.015193891064036402, + 0.12060546716289655, + -0.022779423016187374, + // Stage 16 (10 coefficients): a1601, a1604, a1605, a1606, a1607, a1608, a1609, a1611, a1612, a1613 + 0.051372038802756814, + 0.5414214473439406, + 0.350399806692184, + 0.14193112269692182, + 0.10527377478429423, + -0.031081847805874016, + -0.007401883149519145, + -0.006377932504865363, + -0.17325495908361865, + -0.18228156777622026, + ]; +} + impl<'a, const D: usize> Integrator for Vern7 where - Vern7: Vern7Integrator<'a>, + Vern7: Vern7Integrator<'a> + Vern7ExtraStages<'a>, { const ORDER: usize = 7; const STAGES: usize = 10; const ADAPTIVE: bool = true; const DENSE: bool = true; + // Lazy dense output configuration + const MAIN_STAGES: usize = 10; + const EXTRA_STAGES: usize = 6; + fn step

( &self, ode: &ODE, @@ -235,8 +333,10 @@ where t: f64, ) -> SVector { // Vern7 uses 7th order polynomial interpolation - // The full implementation would compute 6 extra stages (k11-k16) lazily - // For now, we use a simplified version with just the main 10 stages + // Check if extra stages (k11-k16) are available + // Dense array format: [y0, y1, k1, k2, ..., k10, k11, ..., k16] + // With main stages only: length = 2 + 10 = 12 + // With all stages: length = 2 + 10 + 6 = 18 let theta = (t - t_start) / (t_end - t_start); let theta2 = theta * theta; @@ -256,10 +356,6 @@ where let k9 = &dense[10]; // k9 // k10 is at dense[11] but not used in interpolation - // Interpolation polynomial coefficients (from Julia source) - // b1Θ = Θ * evalpoly(Θ, [r011, r012, ..., r017]) - // b4Θ through b9Θ = Θ² * evalpoly(Θ, [coeffs]) - // Helper to evaluate polynomial using Horner's method #[inline] fn evalpoly(x: f64, coeffs: &[f64]) -> f64 { @@ -336,15 +432,197 @@ where -52.85010499525706, ]); - // Compute interpolated value - // Full implementation would also include k11-k16 with their own polynomials - y0 + h * (k1 * b1_theta + + // Compute base interpolation with main stages + let mut result = y0 + h * (k1 * b1_theta + k4 * b4_theta + k5 * b5_theta + k6 * b6_theta + k7 * b7_theta + k8 * b8_theta + - k9 * b9_theta) + k9 * b9_theta); + + // If extra stages are available, add their contribution for full 7th order accuracy + if dense.len() >= 2 + Self::TOTAL_DENSE_STAGES { + // Extra stages are at indices 12-17 + let k11 = &dense[12]; + let k12 = &dense[13]; + let k13 = &dense[14]; + let k14 = &dense[15]; + let k15 = &dense[16]; + let k16 = &dense[17]; + + // Stages 11-16: all start at degree 2 + let b11_theta = theta2 * evalpoly(theta, &[ + -2.1696320280163506, + 22.016696037569876, + -86.90152427798948, + 159.22388973861476, + -135.9618306534588, + 43.792401183280006, + ]); + + let b12_theta = theta2 * evalpoly(theta, &[ + -4.890070188793804, + 22.75407737425176, + -30.78034218537731, + -2.797194317207249, + 31.369456637508403, + -15.655927320381801, + ]); + + let b13_theta = theta2 * evalpoly(theta, &[ + 10.862170929551967, + -50.542971417827104, + 68.37148040407511, + 6.213326521632409, + -69.68006323194157, + 34.776056794509195, + ]); + + let b14_theta = theta2 * evalpoly(theta, &[ + -11.37286691922923, + 130.79058078246717, + -488.65113677785604, + 832.2148793276441, + -664.7743368554426, + 201.79288044241662, + ]); + + let b15_theta = theta2 * evalpoly(theta, &[ + -5.919778732715007, + 63.27679965889219, + -265.432682088738, + 520.1009254140611, + -467.412109533902, + 155.3868452824017, + ]); + + let b16_theta = theta2 * evalpoly(theta, &[ + -10.492146197961823, + 105.35538525188011, + -409.43975011988937, + 732.831448907654, + -606.3044574733512, + 188.0495196316683, + ]); + + // Add contribution from extra stages + result += h * (k11 * b11_theta + + k12 * b12_theta + + k13 * b13_theta + + k14 * b14_theta + + k15 * b15_theta + + k16 * b16_theta); + } + + result + } + + fn compute_extra_stages

( + &self, + ode: &ODE, + t_start: f64, + y_start: SVector, + h: f64, + main_stages: &[SVector], + ) -> Vec> { + // Extract main stages that are used in extra stage computation + // From Julia: extra stages use k1, k4, k5, k6, k7, k8, k9 + let k1 = &main_stages[0]; + let k4 = &main_stages[3]; + let k5 = &main_stages[4]; + let k6 = &main_stages[5]; + let k7 = &main_stages[6]; + let k8 = &main_stages[7]; + let k9 = &main_stages[8]; + + let mut extra_stages = Vec::with_capacity(Self::EXTRA_STAGES); + + // Stage 11: uses k1, k4-k9 (7 coefficients) + let mut y11 = y_start; + y11 += k1 * Self::A_EXTRA[0] * h; + y11 += k4 * Self::A_EXTRA[1] * h; + y11 += k5 * Self::A_EXTRA[2] * h; + y11 += k6 * Self::A_EXTRA[3] * h; + y11 += k7 * Self::A_EXTRA[4] * h; + y11 += k8 * Self::A_EXTRA[5] * h; + y11 += k9 * Self::A_EXTRA[6] * h; + let k11 = (ode.f)(t_start + Self::C_EXTRA[0] * h, y11, &ode.params); + extra_stages.push(k11); + + // Stage 12: uses k1, k4-k9, k11 (8 coefficients) + let mut y12 = y_start; + y12 += k1 * Self::A_EXTRA[7] * h; + y12 += k4 * Self::A_EXTRA[8] * h; + y12 += k5 * Self::A_EXTRA[9] * h; + y12 += k6 * Self::A_EXTRA[10] * h; + y12 += k7 * Self::A_EXTRA[11] * h; + y12 += k8 * Self::A_EXTRA[12] * h; + y12 += k9 * Self::A_EXTRA[13] * h; + y12 += &extra_stages[0] * Self::A_EXTRA[14] * h; // k11 + let k12 = (ode.f)(t_start + Self::C_EXTRA[1] * h, y12, &ode.params); + extra_stages.push(k12); + + // Stage 13: uses k1, k4-k9, k11, k12 (9 coefficients) + let mut y13 = y_start; + y13 += k1 * Self::A_EXTRA[15] * h; + y13 += k4 * Self::A_EXTRA[16] * h; + y13 += k5 * Self::A_EXTRA[17] * h; + y13 += k6 * Self::A_EXTRA[18] * h; + y13 += k7 * Self::A_EXTRA[19] * h; + y13 += k8 * Self::A_EXTRA[20] * h; + y13 += k9 * Self::A_EXTRA[21] * h; + y13 += &extra_stages[0] * Self::A_EXTRA[22] * h; // k11 + y13 += &extra_stages[1] * Self::A_EXTRA[23] * h; // k12 + let k13 = (ode.f)(t_start + Self::C_EXTRA[2] * h, y13, &ode.params); + extra_stages.push(k13); + + // Stage 14: uses k1, k4-k9, k11, k12, k13 (10 coefficients) + let mut y14 = y_start; + y14 += k1 * Self::A_EXTRA[24] * h; + y14 += k4 * Self::A_EXTRA[25] * h; + y14 += k5 * Self::A_EXTRA[26] * h; + y14 += k6 * Self::A_EXTRA[27] * h; + y14 += k7 * Self::A_EXTRA[28] * h; + y14 += k8 * Self::A_EXTRA[29] * h; + y14 += k9 * Self::A_EXTRA[30] * h; + y14 += &extra_stages[0] * Self::A_EXTRA[31] * h; // k11 + y14 += &extra_stages[1] * Self::A_EXTRA[32] * h; // k12 + y14 += &extra_stages[2] * Self::A_EXTRA[33] * h; // k13 + let k14 = (ode.f)(t_start + Self::C_EXTRA[3] * h, y14, &ode.params); + extra_stages.push(k14); + + // Stage 15: uses k1, k4-k9, k11, k12, k13 (10 coefficients, reuses k13 not k14) + let mut y15 = y_start; + y15 += k1 * Self::A_EXTRA[34] * h; + y15 += k4 * Self::A_EXTRA[35] * h; + y15 += k5 * Self::A_EXTRA[36] * h; + y15 += k6 * Self::A_EXTRA[37] * h; + y15 += k7 * Self::A_EXTRA[38] * h; + y15 += k8 * Self::A_EXTRA[39] * h; + y15 += k9 * Self::A_EXTRA[40] * h; + y15 += &extra_stages[0] * Self::A_EXTRA[41] * h; // k11 + y15 += &extra_stages[1] * Self::A_EXTRA[42] * h; // k12 + y15 += &extra_stages[2] * Self::A_EXTRA[43] * h; // k13 + let k15 = (ode.f)(t_start + Self::C_EXTRA[4] * h, y15, &ode.params); + extra_stages.push(k15); + + // Stage 16: uses k1, k4-k9, k11, k12, k13 (10 coefficients, reuses k13 not k14 or k15) + let mut y16 = y_start; + y16 += k1 * Self::A_EXTRA[44] * h; + y16 += k4 * Self::A_EXTRA[45] * h; + y16 += k5 * Self::A_EXTRA[46] * h; + y16 += k6 * Self::A_EXTRA[47] * h; + y16 += k7 * Self::A_EXTRA[48] * h; + y16 += k8 * Self::A_EXTRA[49] * h; + y16 += k9 * Self::A_EXTRA[50] * h; + y16 += &extra_stages[0] * Self::A_EXTRA[51] * h; // k11 + y16 += &extra_stages[1] * Self::A_EXTRA[52] * h; // k12 + y16 += &extra_stages[2] * Self::A_EXTRA[53] * h; // k13 + let k16 = (ode.f)(t_start + Self::C_EXTRA[5] * h, y16, &ode.params); + extra_stages.push(k16); + + extra_stages } } diff --git a/src/problem.rs b/src/problem.rs index b3f2cb6..eccedcf 100644 --- a/src/problem.rs +++ b/src/problem.rs @@ -1,5 +1,6 @@ use nalgebra::SVector; use roots::{find_root_brent, SimpleConvergency}; +use std::cell::RefCell; use super::callback::Callback; use super::controller::{Controller, PIController, TryStep}; @@ -29,14 +30,14 @@ where callbacks: Vec::new(), } } - pub fn solve(&mut self) -> Solution { + pub fn solve(&mut self) -> Solution<'_, S, D, P> { let mut convergency = SimpleConvergency { eps: 1e-12, max_iter: 1000, }; let mut times: Vec = vec![self.ode.t]; let mut states: Vec> = vec![self.ode.y]; - let mut dense_coefficients: Vec>> = Vec::new(); + let mut dense_coefficients: Vec>>> = Vec::new(); while self.ode.t < self.ode.t_end { if self.ode.t + self.controller.next_step_guess.extract() > self.ode.t_end { // If the next step would go past the end, then just set it to the end @@ -100,9 +101,10 @@ where times.push(self.ode.t); states.push(self.ode.y); // TODO: Implement third order interpolation for non-dense algorithms - dense_coefficients.push(dense_option.unwrap()); + dense_coefficients.push(RefCell::new(dense_option.unwrap())); } Solution { + ode: &self.ode, integrator: self.integrator, times, states, @@ -121,17 +123,18 @@ where } } -pub struct Solution +pub struct Solution<'a, S, const D: usize, P> where S: Integrator, { + pub ode: &'a ODE<'a, D, P>, pub integrator: S, pub times: Vec, pub states: Vec>, - pub dense: Vec>>, + pub dense: Vec>>>, } -impl Solution +impl<'a, S, const D: usize, P> Solution<'a, S, D, P> where S: Integrator, { @@ -153,11 +156,47 @@ where match times.binary_search_by(|x| x.total_cmp(&t)) { Ok(index) => self.states[index], Err(end_index) => { - // Then send that to the integrator let t_start = times[end_index - 1]; let t_end = times[end_index]; - self.integrator - .interpolate(t_start, t_end, &self.dense[end_index - 1], t) + let y_start = self.states[end_index - 1]; + let h = t_end - t_start; + + // Check if we need to compute extra stages for lazy dense output + let dense_cell = &self.dense[end_index - 1]; + + if S::EXTRA_STAGES > 0 { + let needs_extra = { + let borrowed = dense_cell.borrow(); + // Dense array format: [y0, y1, k1, k2, ..., k_main] + // If we have main stages only: 2 + MAIN_STAGES elements + // If we have all stages: 2 + MAIN_STAGES + EXTRA_STAGES elements + borrowed.len() < 2 + S::TOTAL_DENSE_STAGES + }; + + if needs_extra { + // Compute extra stages and append to dense output + let mut dense = dense_cell.borrow_mut(); + + // Extract main stages (skip y0 and y1 at indices 0 and 1) + let main_stages = &dense[2..2 + S::MAIN_STAGES]; + + // Compute extra stages lazily + let extra_stages = self.integrator.compute_extra_stages( + self.ode, + t_start, + y_start, + h, + main_stages, + ); + + // Append extra stages to dense output (cached for future interpolations) + dense.extend(extra_stages); + } + } + + // Now interpolate with the (possibly augmented) dense output + let dense = dense_cell.borrow(); + self.integrator.interpolate(t_start, t_end, &dense, t) } } }