diff --git a/benches/driver.rs b/benches/driver.rs index 238532b..2aea233 100644 --- a/benches/driver.rs +++ b/benches/driver.rs @@ -56,5 +56,5 @@ pub fn bench_step_spirv(c: &mut Criterion) { // } // } -criterion_group!(benches, bench_step, bench_step_spirv); +criterion_group!(benches, /*bench_step,*/ bench_step_spirv); criterion_main!(benches); diff --git a/benches/history.txt b/benches/history.txt index f51cdc6..94dd71c 100644 --- a/benches/history.txt +++ b/benches/history.txt @@ -56,3 +56,19 @@ Driver::step_with_pml/4 time: [4.2738 ms 4.3023 ms 4.3330 ms] Driver::step_with_pml/8 time: [4.6272 ms 4.6661 ms 4.7082 ms] Driver::step_with_pml/16 time: [4.8251 ms 4.8726 ms 4.9234 ms] + + +b25aa6f5b3565768c0a6899ecf8652b521353255 +Driver::step_spirv/10 time: [257.08 us 258.06 us 259.21 us] +Driver::step_spirv/20 time: [381.89 us 383.11 us 384.34 us] +Driver::step_spirv/40 time: [1.3031 ms 1.3075 ms 1.3127 ms] +Driver::step_spirv/80 time: [12.312 ms 12.329 ms 12.347 ms] +Driver::step_spirv/160 time: [257.39 ms 258.06 ms 258.73 ms] + + +optimized spirv MHPgram: +Driver::step_spirv/10 time: [252.52 us 253.43 us 254.43 us] +Driver::step_spirv/20 time: [377.44 us 378.57 us 379.75 us] +Driver::step_spirv/40 time: [1.3021 ms 1.3058 ms 1.3100 ms] +Driver::step_spirv/80 time: [12.413 ms 12.430 ms 12.448 ms] +Driver::step_spirv/160 time: [255.50 ms 256.15 ms 256.84 ms] diff --git a/src/sim/spirv.rs b/src/sim/spirv.rs index 115a9c4..09d2011 100644 --- a/src/sim/spirv.rs +++ b/src/sim/spirv.rs @@ -135,7 +135,7 @@ impl Into for mat::MBPgram { impl From for mat::MHPgram { fn from(p: SpirvMHPgram) -> Self { - Self::new(p.h_intercept, p.mu_r, p.max_m) + Self::new(p.h_intercept(), p.mu_r(), p.max_m) } } diff --git a/src/sim/spirv_backend/src/mat.rs b/src/sim/spirv_backend/src/mat.rs index 8e3c020..2ca670b 100644 --- a/src/sim/spirv_backend/src/mat.rs +++ b/src/sim/spirv_backend/src/mat.rs @@ -57,53 +57,79 @@ impl MBPgram { #[derive(Copy, Clone, Default, PartialEq)] pub struct MHPgram { - /// X coordinate at which M is always zero. - pub h_intercept: f32, - /// relative mu value along the non-flat edges of the parallelogram. - pub mu_r: f32, + /// optimized form of mu_0^-1 * (1-mu_r^-1) + b_mult: f32, + /// optimized form of h_intercept * (1-mu_r^-1) + m_offset: f32, /// Vertical range of the graph pub max_m: f32, } impl MHPgram { + /// h_intercept: X coordinate at which M is always zero. + /// mu_r: relative mu value along the non-flat edges of the parallelogram. pub fn new(h_intercept: f32, mu_r: f32, max_m: f32) -> Self { - Self { h_intercept, mu_r, max_m } + const MU0_INV: f32 = 795774.715025073; + let one_minus_mu_r_inv = 1.0 - 1.0/mu_r; + Self { + b_mult: MU0_INV * one_minus_mu_r_inv, + m_offset: h_intercept * one_minus_mu_r_inv, + max_m, + } + } + + pub fn h_intercept(&self) -> f32 { + const MU0_INV: f32 = 795774.715025073; + MU0_INV * self.m_offset / self.b_mult + } + pub fn mu_r(&self) -> f32 { + const MU0: f32 = 1.2566370621219e-06; + let mu_r_inv = 1.0 - MU0*self.b_mult; + 1.0 / mu_r_inv } /// Return the new `M` pub fn move_b(self, m: f32, target_b: f32) -> f32 { - const MU0_INV: f32 = 795774.715025073; - let target_mh = target_b*MU0_INV; // The point may exist outside the parallelogram. // The right slope is defined by: // B(H)/mu0 = h_intercept + (h-h_intercept)*mu_r + // "unoptimized" solution: + // let target_mh = target_b*MU0_INV; + // let h_on_right = (target_mh-self.h_intercept)/self.mu_r + self.h_intercept; + // let m_on_right = target_mh - h_on_right; // Left: // B(H)/mu0 = -h_intercept + (h+h_intercept)*mu_r - let h_on_right = (target_mh-self.h_intercept)/self.mu_r + self.h_intercept; - let m_on_right = target_mh - h_on_right; - let h_on_left = (target_mh+self.h_intercept)/self.mu_r - self.h_intercept; - let m_on_left = target_mh - h_on_left; + // "unoptimized" solution: + // let h_on_left = (target_mh+self.h_intercept)/self.mu_r - self.h_intercept; + // let m_on_left = target_mh - h_on_left; + + let m_on_right = target_b*self.b_mult - self.m_offset; + let m_on_left = target_b*self.b_mult + self.m_offset; if m_on_right >= m { - if m_on_right <= self.max_m { - // rightward edge movement - m_on_right - } else { - // right of saturation - self.max_m - } + // if m_on_right <= self.max_m { + // // rightward edge movement + // m_on_right + // } else { + // // right of saturation + // self.max_m + // } + m_on_right.min(self.max_m) } else if m_on_left <= m { - if m_on_left >= -self.max_m { - // leftward edge movement - m_on_left - } else { - // left of saturation - -self.max_m - } + // if m_on_left >= -self.max_m { + // // leftward edge movement + // m_on_left + // } else { + // // left of saturation + // -self.max_m + // } + m_on_left.max(-self.max_m) } else { // interior movement m } + // this boils down to: + // m.clamp(m_on_left, m_on_right).clamp(-max_m, max_m) } pub fn move_b_vec(self, m: Vec3Std, target_b: Vec3Std) -> Vec3Std { @@ -191,6 +217,13 @@ mod test { assert_eq_approx(curve.move_b(310000.0, -0.25), -198703.0, 1.0); } + #[test] + fn mh_curve_parameters() { + let curve = MHPgram::new(50.0, 101.0, 500.0); + assert_eq_approx(curve.h_intercept(), 50.0, 1e-3); + assert_eq_approx(curve.mu_r(), 101.0, 1e-3); + } + #[test] fn mh_curve_edge_travel() { const MU0: f32 = 1.2566370621219e-06;