Optimize the spirv MHPgram

Decreases time by about 1% (the benchmark might not actually exercise
all of that, though? -- could be largely code-size related)
This commit is contained in:
2022-01-24 22:11:24 -08:00
parent ad655c7df6
commit 0c407eb3c5
4 changed files with 76 additions and 27 deletions

View File

@@ -56,5 +56,5 @@ pub fn bench_step_spirv(c: &mut Criterion) {
// } // }
// } // }
criterion_group!(benches, bench_step, bench_step_spirv); criterion_group!(benches, /*bench_step,*/ bench_step_spirv);
criterion_main!(benches); criterion_main!(benches);

View File

@@ -56,3 +56,19 @@ Driver::step_with_pml/4 time: [4.2738 ms 4.3023 ms 4.3330 ms]
Driver::step_with_pml/8 time: [4.6272 ms 4.6661 ms 4.7082 ms] Driver::step_with_pml/8 time: [4.6272 ms 4.6661 ms 4.7082 ms]
Driver::step_with_pml/16 Driver::step_with_pml/16
time: [4.8251 ms 4.8726 ms 4.9234 ms] time: [4.8251 ms 4.8726 ms 4.9234 ms]
b25aa6f5b3565768c0a6899ecf8652b521353255
Driver::step_spirv/10 time: [257.08 us 258.06 us 259.21 us]
Driver::step_spirv/20 time: [381.89 us 383.11 us 384.34 us]
Driver::step_spirv/40 time: [1.3031 ms 1.3075 ms 1.3127 ms]
Driver::step_spirv/80 time: [12.312 ms 12.329 ms 12.347 ms]
Driver::step_spirv/160 time: [257.39 ms 258.06 ms 258.73 ms]
optimized spirv MHPgram:
Driver::step_spirv/10 time: [252.52 us 253.43 us 254.43 us]
Driver::step_spirv/20 time: [377.44 us 378.57 us 379.75 us]
Driver::step_spirv/40 time: [1.3021 ms 1.3058 ms 1.3100 ms]
Driver::step_spirv/80 time: [12.413 ms 12.430 ms 12.448 ms]
Driver::step_spirv/160 time: [255.50 ms 256.15 ms 256.84 ms]

View File

@@ -135,7 +135,7 @@ impl Into<SpirvMBPgram> for mat::MBPgram<f32> {
impl From<SpirvMHPgram> for mat::MHPgram<f32> { impl From<SpirvMHPgram> for mat::MHPgram<f32> {
fn from(p: SpirvMHPgram) -> Self { fn from(p: SpirvMHPgram) -> Self {
Self::new(p.h_intercept, p.mu_r, p.max_m) Self::new(p.h_intercept(), p.mu_r(), p.max_m)
} }
} }

View File

@@ -57,53 +57,79 @@ impl MBPgram {
#[derive(Copy, Clone, Default, PartialEq)] #[derive(Copy, Clone, Default, PartialEq)]
pub struct MHPgram { pub struct MHPgram {
/// X coordinate at which M is always zero. /// optimized form of mu_0^-1 * (1-mu_r^-1)
pub h_intercept: f32, b_mult: f32,
/// relative mu value along the non-flat edges of the parallelogram. /// optimized form of h_intercept * (1-mu_r^-1)
pub mu_r: f32, m_offset: f32,
/// Vertical range of the graph /// Vertical range of the graph
pub max_m: f32, pub max_m: f32,
} }
impl MHPgram { impl MHPgram {
/// h_intercept: X coordinate at which M is always zero.
/// mu_r: relative mu value along the non-flat edges of the parallelogram.
pub fn new(h_intercept: f32, mu_r: f32, max_m: f32) -> Self { pub fn new(h_intercept: f32, mu_r: f32, max_m: f32) -> Self {
Self { h_intercept, mu_r, max_m } const MU0_INV: f32 = 795774.715025073;
let one_minus_mu_r_inv = 1.0 - 1.0/mu_r;
Self {
b_mult: MU0_INV * one_minus_mu_r_inv,
m_offset: h_intercept * one_minus_mu_r_inv,
max_m,
}
}
pub fn h_intercept(&self) -> f32 {
const MU0_INV: f32 = 795774.715025073;
MU0_INV * self.m_offset / self.b_mult
}
pub fn mu_r(&self) -> f32 {
const MU0: f32 = 1.2566370621219e-06;
let mu_r_inv = 1.0 - MU0*self.b_mult;
1.0 / mu_r_inv
} }
/// Return the new `M` /// Return the new `M`
pub fn move_b(self, m: f32, target_b: f32) -> f32 { pub fn move_b(self, m: f32, target_b: f32) -> f32 {
const MU0_INV: f32 = 795774.715025073;
let target_mh = target_b*MU0_INV;
// The point may exist outside the parallelogram. // The point may exist outside the parallelogram.
// The right slope is defined by: // The right slope is defined by:
// B(H)/mu0 = h_intercept + (h-h_intercept)*mu_r // B(H)/mu0 = h_intercept + (h-h_intercept)*mu_r
// "unoptimized" solution:
// let target_mh = target_b*MU0_INV;
// let h_on_right = (target_mh-self.h_intercept)/self.mu_r + self.h_intercept;
// let m_on_right = target_mh - h_on_right;
// Left: // Left:
// B(H)/mu0 = -h_intercept + (h+h_intercept)*mu_r // B(H)/mu0 = -h_intercept + (h+h_intercept)*mu_r
let h_on_right = (target_mh-self.h_intercept)/self.mu_r + self.h_intercept; // "unoptimized" solution:
let m_on_right = target_mh - h_on_right; // let h_on_left = (target_mh+self.h_intercept)/self.mu_r - self.h_intercept;
let h_on_left = (target_mh+self.h_intercept)/self.mu_r - self.h_intercept; // let m_on_left = target_mh - h_on_left;
let m_on_left = target_mh - h_on_left;
let m_on_right = target_b*self.b_mult - self.m_offset;
let m_on_left = target_b*self.b_mult + self.m_offset;
if m_on_right >= m { if m_on_right >= m {
if m_on_right <= self.max_m { // if m_on_right <= self.max_m {
// rightward edge movement // // rightward edge movement
m_on_right // m_on_right
} else { // } else {
// right of saturation // // right of saturation
self.max_m // self.max_m
} // }
m_on_right.min(self.max_m)
} else if m_on_left <= m { } else if m_on_left <= m {
if m_on_left >= -self.max_m { // if m_on_left >= -self.max_m {
// leftward edge movement // // leftward edge movement
m_on_left // m_on_left
} else { // } else {
// left of saturation // // left of saturation
-self.max_m // -self.max_m
} // }
m_on_left.max(-self.max_m)
} else { } else {
// interior movement // interior movement
m m
} }
// this boils down to:
// m.clamp(m_on_left, m_on_right).clamp(-max_m, max_m)
} }
pub fn move_b_vec(self, m: Vec3Std, target_b: Vec3Std) -> Vec3Std { pub fn move_b_vec(self, m: Vec3Std, target_b: Vec3Std) -> Vec3Std {
@@ -191,6 +217,13 @@ mod test {
assert_eq_approx(curve.move_b(310000.0, -0.25), -198703.0, 1.0); assert_eq_approx(curve.move_b(310000.0, -0.25), -198703.0, 1.0);
} }
#[test]
fn mh_curve_parameters() {
let curve = MHPgram::new(50.0, 101.0, 500.0);
assert_eq_approx(curve.h_intercept(), 50.0, 1e-3);
assert_eq_approx(curve.mu_r(), 101.0, 1e-3);
}
#[test] #[test]
fn mh_curve_edge_travel() { fn mh_curve_edge_travel() {
const MU0: f32 = 1.2566370621219e-06; const MU0: f32 = 1.2566370621219e-06;