eval_into: remove the scale
parameter
this actually seems to drop perf from 637 -> 595 ish? i suppose the compiler was able to fold the time multiplier in with the scale multipler? idk, somewhat surprised.
This commit is contained in:
@@ -233,7 +233,7 @@ where
|
|||||||
let dim_len = dim.product_sum_usize();
|
let dim_len = dim.product_sum_usize();
|
||||||
let feature_size = self.feature_size();
|
let feature_size = self.feature_size();
|
||||||
let t_sec = self.time();
|
let t_sec = self.time();
|
||||||
let timestep = self.meta.time_step.cast();
|
let timestep = self.meta.time_step;
|
||||||
|
|
||||||
// we'll evaluate in parallel each row (const y/z) of the stimulus.
|
// we'll evaluate in parallel each row (const y/z) of the stimulus.
|
||||||
let mut backing = Vec::new();
|
let mut backing = Vec::new();
|
||||||
@@ -246,7 +246,7 @@ where
|
|||||||
let this_slice;
|
let this_slice;
|
||||||
(this_slice, undispatched_backing) = undispatched_backing.split_at_mut(dim.x() as usize);
|
(this_slice, undispatched_backing) = undispatched_backing.split_at_mut(dim.x() as usize);
|
||||||
let view = OffsetDimSlice::new(Vec3u::new(0, y, z), Vec3u::new(this_slice.len() as u32, 1, 1), this_slice);
|
let view = OffsetDimSlice::new(Vec3u::new(0, y, z), Vec3u::new(this_slice.len() as u32, 1, 1), this_slice);
|
||||||
s.spawn(move |_| stim.eval_into(t_sec, feature_size, timestep, view));
|
s.spawn(move |_| stim.eval_into(t_sec, feature_size, view));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -258,8 +258,8 @@ where
|
|||||||
h.reserve(dim_len);
|
h.reserve(dim_len);
|
||||||
|
|
||||||
for field in backing {
|
for field in backing {
|
||||||
e.push(field.e.cast::<R>());
|
e.push(field.e.cast::<R>() * timestep);
|
||||||
h.push(field.h.cast::<R>());
|
h.push(field.h.cast::<R>() * timestep);
|
||||||
}
|
}
|
||||||
|
|
||||||
(e, h)
|
(e, h)
|
||||||
|
@@ -113,7 +113,7 @@ pub trait Stimulus: Sync {
|
|||||||
/// Return the (E, H) field which should be added PER-SECOND to the provided position/time.
|
/// Return the (E, H) field which should be added PER-SECOND to the provided position/time.
|
||||||
fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields {
|
fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields {
|
||||||
let mut dat = OffsetDimSlice::new(loc.into(), Vec3u::new(1, 1, 1), [Fields::default()]);
|
let mut dat = OffsetDimSlice::new(loc.into(), Vec3u::new(1, 1, 1), [Fields::default()]);
|
||||||
self.eval_into(t_sec, feat_size, 1.0, dat.as_mut());
|
self.eval_into(t_sec, feat_size, dat.as_mut());
|
||||||
let [fields] = dat.into_inner();
|
let [fields] = dat.into_inner();
|
||||||
fields
|
fields
|
||||||
}
|
}
|
||||||
@@ -121,9 +121,9 @@ pub trait Stimulus: Sync {
|
|||||||
// TODO: could remove the `scale` param if we parameterized this array over some `F: FnMut(Fields)` instead `Fields`.
|
// TODO: could remove the `scale` param if we parameterized this array over some `F: FnMut(Fields)` instead `Fields`.
|
||||||
// that would also allow easier unzipping in the SpirvSim code.
|
// that would also allow easier unzipping in the SpirvSim code.
|
||||||
/// bulk version of at. evaluate several positions at once and populate the output region.
|
/// bulk version of at. evaluate several positions at once and populate the output region.
|
||||||
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
||||||
for (idx, out) in into.enumerated() {
|
for (idx, out) in into.enumerated() {
|
||||||
*out += self.at(t_sec, feat_size, Index::from(idx)) * scale;
|
*out += self.at(t_sec, feat_size, Index::from(idx));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -214,12 +214,11 @@ struct StimEvalInto<'a> {
|
|||||||
into: OffsetDimSlice<&'a mut [Fields]>,
|
into: OffsetDimSlice<&'a mut [Fields]>,
|
||||||
t_sec: f32,
|
t_sec: f32,
|
||||||
feat_size: f32,
|
feat_size: f32,
|
||||||
scale: f32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, S: Stimulus> Visitor<&S> for StimEvalInto<'a> {
|
impl<'a, S: Stimulus> Visitor<&S> for StimEvalInto<'a> {
|
||||||
fn visit(&mut self, next: &S) {
|
fn visit(&mut self, next: &S) {
|
||||||
next.eval_into(self.t_sec, self.feat_size, self.scale, self.into.as_mut());
|
next.eval_into(self.t_sec, self.feat_size, self.into.as_mut());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,8 +226,8 @@ impl<L: Sync> Stimulus for L
|
|||||||
where
|
where
|
||||||
for<'a, 'b> &'a L: Visit<StimEvalInto<'b>>,
|
for<'a, 'b> &'a L: Visit<StimEvalInto<'b>>,
|
||||||
{
|
{
|
||||||
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
||||||
let ev = StimEvalInto { t_sec, feat_size, scale, into };
|
let ev = StimEvalInto { t_sec, feat_size, into };
|
||||||
self.visit(ev);
|
self.visit(ev);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -274,9 +273,9 @@ impl<S: Stimulus> Stimulus for StimuliVec<S> {
|
|||||||
self.0.iter().map(|i| i.at(t_sec, feat_size, loc))
|
self.0.iter().map(|i| i.at(t_sec, feat_size, loc))
|
||||||
.fold(Fields::default(), core::ops::Add::add)
|
.fold(Fields::default(), core::ops::Add::add)
|
||||||
}
|
}
|
||||||
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, mut into: OffsetDimSlice<&mut [Fields]>) {
|
fn eval_into(&self, t_sec: f32, feat_size: f32, mut into: OffsetDimSlice<&mut [Fields]>) {
|
||||||
for i in &self.0 {
|
for i in &self.0 {
|
||||||
i.eval_into(t_sec, feat_size, scale, into.as_mut());
|
i.eval_into(t_sec, feat_size, into.as_mut());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -558,9 +557,9 @@ impl<T: Stimulus> Stimulus for Gated<T> {
|
|||||||
Default::default()
|
Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
||||||
if self.active.contains(t_sec) {
|
if self.active.contains(t_sec) {
|
||||||
self.inner.eval_into(t_sec, feat_size, scale, into);
|
self.inner.eval_into(t_sec, feat_size, into);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -587,8 +586,8 @@ impl<T: Stimulus> Stimulus for Shifted<T> {
|
|||||||
fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields {
|
fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields {
|
||||||
self.inner.at(t_sec - self.start_at, feat_size, loc)
|
self.inner.at(t_sec - self.start_at, feat_size, loc)
|
||||||
}
|
}
|
||||||
fn eval_into(&self, t_sec: f32, feat_size: f32, scale: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) {
|
||||||
self.inner.eval_into(t_sec - self.start_at, feat_size, scale, into)
|
self.inner.eval_into(t_sec - self.start_at, feat_size, into)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user