eval_into: remove the scale parameter

this actually seems to drop perf from 637 -> 595 ish?

i suppose the compiler was able to fold the time multiplier in with the
scale multipler? idk, somewhat surprised.
This commit is contained in:
2022-08-19 04:26:58 -07:00
parent ad5f064584
commit 8df001773f
2 changed files with 16 additions and 17 deletions

View File

@@ -233,7 +233,7 @@ where
let dim_len = dim.product_sum_usize(); let dim_len = dim.product_sum_usize();
let feature_size = self.feature_size(); let feature_size = self.feature_size();
let t_sec = self.time(); let t_sec = self.time();
let timestep = self.meta.time_step.cast(); let timestep = self.meta.time_step;
// we'll evaluate in parallel each row (const y/z) of the stimulus. // we'll evaluate in parallel each row (const y/z) of the stimulus.
let mut backing = Vec::new(); let mut backing = Vec::new();
@@ -246,7 +246,7 @@ where
let this_slice; let this_slice;
(this_slice, undispatched_backing) = undispatched_backing.split_at_mut(dim.x() as usize); (this_slice, undispatched_backing) = undispatched_backing.split_at_mut(dim.x() as usize);
let view = OffsetDimSlice::new(Vec3u::new(0, y, z), Vec3u::new(this_slice.len() as u32, 1, 1), this_slice); let view = OffsetDimSlice::new(Vec3u::new(0, y, z), Vec3u::new(this_slice.len() as u32, 1, 1), this_slice);
s.spawn(move |_| stim.eval_into(t_sec, feature_size, timestep, view)); s.spawn(move |_| stim.eval_into(t_sec, feature_size, view));
} }
} }
}); });
@@ -258,8 +258,8 @@ where
h.reserve(dim_len); h.reserve(dim_len);
for field in backing { for field in backing {
e.push(field.e.cast::<R>()); e.push(field.e.cast::<R>() * timestep);
h.push(field.h.cast::<R>()); h.push(field.h.cast::<R>() * timestep);
} }
(e, h) (e, h)

View File

@@ -113,7 +113,7 @@ pub trait Stimulus: Sync {
/// Return the (E, H) field which should be added PER-SECOND to the provided position/time. /// Return the (E, H) field which should be added PER-SECOND to the provided position/time.
fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields { fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields {
let mut dat = OffsetDimSlice::new(loc.into(), Vec3u::new(1, 1, 1), [Fields::default()]); let mut dat = OffsetDimSlice::new(loc.into(), Vec3u::new(1, 1, 1), [Fields::default()]);
self.eval_into(t_sec, feat_size, 1.0, dat.as_mut()); self.eval_into(t_sec, feat_size, dat.as_mut());
let [fields] = dat.into_inner(); let [fields] = dat.into_inner();
fields fields
} }
@@ -121,9 +121,9 @@ pub trait Stimulus: Sync {
// TODO: could remove the `scale` param if we parameterized this array over some `F: FnMut(Fields)` instead `Fields`. // TODO: could remove the `scale` param if we parameterized this array over some `F: FnMut(Fields)` instead `Fields`.
// that would also allow easier unzipping in the SpirvSim code. // that would also allow easier unzipping in the SpirvSim code.
/// bulk version of at. evaluate several positions at once and populate the output region. /// bulk version of at. evaluate several positions at once and populate the output region.
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) { fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
for (idx, out) in into.enumerated() { for (idx, out) in into.enumerated() {
*out += self.at(t_sec, feat_size, Index::from(idx)) * scale; *out += self.at(t_sec, feat_size, Index::from(idx));
} }
} }
} }
@@ -214,12 +214,11 @@ struct StimEvalInto<'a> {
into: OffsetDimSlice<&'a mut [Fields]>, into: OffsetDimSlice<&'a mut [Fields]>,
t_sec: f32, t_sec: f32,
feat_size: f32, feat_size: f32,
scale: f32,
} }
impl<'a, S: Stimulus> Visitor<&S> for StimEvalInto<'a> { impl<'a, S: Stimulus> Visitor<&S> for StimEvalInto<'a> {
fn visit(&mut self, next: &S) { fn visit(&mut self, next: &S) {
next.eval_into(self.t_sec, self.feat_size, self.scale, self.into.as_mut()); next.eval_into(self.t_sec, self.feat_size, self.into.as_mut());
} }
} }
@@ -227,8 +226,8 @@ impl<L: Sync> Stimulus for L
where where
for<'a, 'b> &'a L: Visit<StimEvalInto<'b>>, for<'a, 'b> &'a L: Visit<StimEvalInto<'b>>,
{ {
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) { fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
let ev = StimEvalInto { t_sec, feat_size, scale, into }; let ev = StimEvalInto { t_sec, feat_size, into };
self.visit(ev); self.visit(ev);
} }
} }
@@ -274,9 +273,9 @@ impl<S: Stimulus> Stimulus for StimuliVec<S> {
self.0.iter().map(|i| i.at(t_sec, feat_size, loc)) self.0.iter().map(|i| i.at(t_sec, feat_size, loc))
.fold(Fields::default(), core::ops::Add::add) .fold(Fields::default(), core::ops::Add::add)
} }
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, mut into: OffsetDimSlice<&mut [Fields]>) { fn eval_into(&self, t_sec: f32, feat_size: f32, mut into: OffsetDimSlice<&mut [Fields]>) {
for i in &self.0 { for i in &self.0 {
i.eval_into(t_sec, feat_size, scale, into.as_mut()); i.eval_into(t_sec, feat_size, into.as_mut());
} }
} }
} }
@@ -558,9 +557,9 @@ impl<T: Stimulus> Stimulus for Gated<T> {
Default::default() Default::default()
} }
} }
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) { fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
if self.active.contains(t_sec) { if self.active.contains(t_sec) {
self.inner.eval_into(t_sec, feat_size, scale, into); self.inner.eval_into(t_sec, feat_size, into);
} }
} }
} }
@@ -587,8 +586,8 @@ impl<T: Stimulus> Stimulus for Shifted<T> {
fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields { fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields {
self.inner.at(t_sec - self.start_at, feat_size, loc) self.inner.at(t_sec - self.start_at, feat_size, loc)
} }
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) { fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
self.inner.eval_into(t_sec - self.start_at, feat_size, scale, into) self.inner.eval_into(t_sec - self.start_at, feat_size, into)
} }
} }