Optimize the spirv MHPgram
Decreases time by about 1% (the benchmark might not actually exercise all of that, though? -- could be largely code-size related)
This commit is contained in:
@@ -56,5 +56,5 @@ pub fn bench_step_spirv(c: &mut Criterion) {
|
|||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
criterion_group!(benches, bench_step, bench_step_spirv);
|
criterion_group!(benches, /*bench_step,*/ bench_step_spirv);
|
||||||
criterion_main!(benches);
|
criterion_main!(benches);
|
||||||
|
@@ -56,3 +56,19 @@ Driver::step_with_pml/4 time: [4.2738 ms 4.3023 ms 4.3330 ms]
|
|||||||
Driver::step_with_pml/8 time: [4.6272 ms 4.6661 ms 4.7082 ms]
|
Driver::step_with_pml/8 time: [4.6272 ms 4.6661 ms 4.7082 ms]
|
||||||
Driver::step_with_pml/16
|
Driver::step_with_pml/16
|
||||||
time: [4.8251 ms 4.8726 ms 4.9234 ms]
|
time: [4.8251 ms 4.8726 ms 4.9234 ms]
|
||||||
|
|
||||||
|
|
||||||
|
b25aa6f5b3565768c0a6899ecf8652b521353255
|
||||||
|
Driver::step_spirv/10 time: [257.08 us 258.06 us 259.21 us]
|
||||||
|
Driver::step_spirv/20 time: [381.89 us 383.11 us 384.34 us]
|
||||||
|
Driver::step_spirv/40 time: [1.3031 ms 1.3075 ms 1.3127 ms]
|
||||||
|
Driver::step_spirv/80 time: [12.312 ms 12.329 ms 12.347 ms]
|
||||||
|
Driver::step_spirv/160 time: [257.39 ms 258.06 ms 258.73 ms]
|
||||||
|
|
||||||
|
|
||||||
|
optimized spirv MHPgram:
|
||||||
|
Driver::step_spirv/10 time: [252.52 us 253.43 us 254.43 us]
|
||||||
|
Driver::step_spirv/20 time: [377.44 us 378.57 us 379.75 us]
|
||||||
|
Driver::step_spirv/40 time: [1.3021 ms 1.3058 ms 1.3100 ms]
|
||||||
|
Driver::step_spirv/80 time: [12.413 ms 12.430 ms 12.448 ms]
|
||||||
|
Driver::step_spirv/160 time: [255.50 ms 256.15 ms 256.84 ms]
|
||||||
|
@@ -135,7 +135,7 @@ impl Into<SpirvMBPgram> for mat::MBPgram<f32> {
|
|||||||
|
|
||||||
impl From<SpirvMHPgram> for mat::MHPgram<f32> {
|
impl From<SpirvMHPgram> for mat::MHPgram<f32> {
|
||||||
fn from(p: SpirvMHPgram) -> Self {
|
fn from(p: SpirvMHPgram) -> Self {
|
||||||
Self::new(p.h_intercept, p.mu_r, p.max_m)
|
Self::new(p.h_intercept(), p.mu_r(), p.max_m)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -57,53 +57,79 @@ impl MBPgram {
|
|||||||
|
|
||||||
#[derive(Copy, Clone, Default, PartialEq)]
|
#[derive(Copy, Clone, Default, PartialEq)]
|
||||||
pub struct MHPgram {
|
pub struct MHPgram {
|
||||||
/// X coordinate at which M is always zero.
|
/// optimized form of mu_0^-1 * (1-mu_r^-1)
|
||||||
pub h_intercept: f32,
|
b_mult: f32,
|
||||||
/// relative mu value along the non-flat edges of the parallelogram.
|
/// optimized form of h_intercept * (1-mu_r^-1)
|
||||||
pub mu_r: f32,
|
m_offset: f32,
|
||||||
/// Vertical range of the graph
|
/// Vertical range of the graph
|
||||||
pub max_m: f32,
|
pub max_m: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MHPgram {
|
impl MHPgram {
|
||||||
|
/// h_intercept: X coordinate at which M is always zero.
|
||||||
|
/// mu_r: relative mu value along the non-flat edges of the parallelogram.
|
||||||
pub fn new(h_intercept: f32, mu_r: f32, max_m: f32) -> Self {
|
pub fn new(h_intercept: f32, mu_r: f32, max_m: f32) -> Self {
|
||||||
Self { h_intercept, mu_r, max_m }
|
const MU0_INV: f32 = 795774.715025073;
|
||||||
|
let one_minus_mu_r_inv = 1.0 - 1.0/mu_r;
|
||||||
|
Self {
|
||||||
|
b_mult: MU0_INV * one_minus_mu_r_inv,
|
||||||
|
m_offset: h_intercept * one_minus_mu_r_inv,
|
||||||
|
max_m,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn h_intercept(&self) -> f32 {
|
||||||
|
const MU0_INV: f32 = 795774.715025073;
|
||||||
|
MU0_INV * self.m_offset / self.b_mult
|
||||||
|
}
|
||||||
|
pub fn mu_r(&self) -> f32 {
|
||||||
|
const MU0: f32 = 1.2566370621219e-06;
|
||||||
|
let mu_r_inv = 1.0 - MU0*self.b_mult;
|
||||||
|
1.0 / mu_r_inv
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the new `M`
|
/// Return the new `M`
|
||||||
pub fn move_b(self, m: f32, target_b: f32) -> f32 {
|
pub fn move_b(self, m: f32, target_b: f32) -> f32 {
|
||||||
const MU0_INV: f32 = 795774.715025073;
|
|
||||||
let target_mh = target_b*MU0_INV;
|
|
||||||
// The point may exist outside the parallelogram.
|
// The point may exist outside the parallelogram.
|
||||||
// The right slope is defined by:
|
// The right slope is defined by:
|
||||||
// B(H)/mu0 = h_intercept + (h-h_intercept)*mu_r
|
// B(H)/mu0 = h_intercept + (h-h_intercept)*mu_r
|
||||||
|
// "unoptimized" solution:
|
||||||
|
// let target_mh = target_b*MU0_INV;
|
||||||
|
// let h_on_right = (target_mh-self.h_intercept)/self.mu_r + self.h_intercept;
|
||||||
|
// let m_on_right = target_mh - h_on_right;
|
||||||
// Left:
|
// Left:
|
||||||
// B(H)/mu0 = -h_intercept + (h+h_intercept)*mu_r
|
// B(H)/mu0 = -h_intercept + (h+h_intercept)*mu_r
|
||||||
let h_on_right = (target_mh-self.h_intercept)/self.mu_r + self.h_intercept;
|
// "unoptimized" solution:
|
||||||
let m_on_right = target_mh - h_on_right;
|
// let h_on_left = (target_mh+self.h_intercept)/self.mu_r - self.h_intercept;
|
||||||
let h_on_left = (target_mh+self.h_intercept)/self.mu_r - self.h_intercept;
|
// let m_on_left = target_mh - h_on_left;
|
||||||
let m_on_left = target_mh - h_on_left;
|
|
||||||
|
let m_on_right = target_b*self.b_mult - self.m_offset;
|
||||||
|
let m_on_left = target_b*self.b_mult + self.m_offset;
|
||||||
|
|
||||||
if m_on_right >= m {
|
if m_on_right >= m {
|
||||||
if m_on_right <= self.max_m {
|
// if m_on_right <= self.max_m {
|
||||||
// rightward edge movement
|
// // rightward edge movement
|
||||||
m_on_right
|
// m_on_right
|
||||||
} else {
|
// } else {
|
||||||
// right of saturation
|
// // right of saturation
|
||||||
self.max_m
|
// self.max_m
|
||||||
}
|
// }
|
||||||
|
m_on_right.min(self.max_m)
|
||||||
} else if m_on_left <= m {
|
} else if m_on_left <= m {
|
||||||
if m_on_left >= -self.max_m {
|
// if m_on_left >= -self.max_m {
|
||||||
// leftward edge movement
|
// // leftward edge movement
|
||||||
m_on_left
|
// m_on_left
|
||||||
} else {
|
// } else {
|
||||||
// left of saturation
|
// // left of saturation
|
||||||
-self.max_m
|
// -self.max_m
|
||||||
}
|
// }
|
||||||
|
m_on_left.max(-self.max_m)
|
||||||
} else {
|
} else {
|
||||||
// interior movement
|
// interior movement
|
||||||
m
|
m
|
||||||
}
|
}
|
||||||
|
// this boils down to:
|
||||||
|
// m.clamp(m_on_left, m_on_right).clamp(-max_m, max_m)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn move_b_vec(self, m: Vec3Std, target_b: Vec3Std) -> Vec3Std {
|
pub fn move_b_vec(self, m: Vec3Std, target_b: Vec3Std) -> Vec3Std {
|
||||||
@@ -191,6 +217,13 @@ mod test {
|
|||||||
assert_eq_approx(curve.move_b(310000.0, -0.25), -198703.0, 1.0);
|
assert_eq_approx(curve.move_b(310000.0, -0.25), -198703.0, 1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn mh_curve_parameters() {
|
||||||
|
let curve = MHPgram::new(50.0, 101.0, 500.0);
|
||||||
|
assert_eq_approx(curve.h_intercept(), 50.0, 1e-3);
|
||||||
|
assert_eq_approx(curve.mu_r(), 101.0, 1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn mh_curve_edge_travel() {
|
fn mh_curve_edge_travel() {
|
||||||
const MU0: f32 = 1.2566370621219e-06;
|
const MU0: f32 = 1.2566370621219e-06;
|
||||||
|
Reference in New Issue
Block a user