diff --git a/crates/coremem/src/sim/spirv/mod.rs b/crates/coremem/src/sim/spirv/mod.rs index 64e06df..1e2c3b7 100644 --- a/crates/coremem/src/sim/spirv/mod.rs +++ b/crates/coremem/src/sim/spirv/mod.rs @@ -1,4 +1,3 @@ -use ndarray::{self, Array3}; use serde::{Deserialize, Serialize}; use log::{info, trace, warn}; @@ -7,9 +6,10 @@ use crate::geom::{Coord, Index}; use crate::real::Real; use crate::sim::{AbstractSim, Fields, GenericSim, StaticSim}; use crate::stim::AbstractStimulus; -use crate::cross::vec::Vec3; +use coremem_cross::dim::OffsetDimSlice; use coremem_cross::mat::{FullyGenericMaterial, Material}; use coremem_cross::step::SimMeta; +use coremem_cross::vec::{Vec3, Vec3u}; mod cpu; mod gpu; @@ -104,8 +104,8 @@ where self.backend.step_n( self.meta, self.mat.as_slice(), - stim_e.as_slice().unwrap(), - stim_h.as_slice().unwrap(), + &*stim_e, + &*stim_h, self.e.as_mut_slice(), self.h.as_mut_slice(), self.m.as_mut_slice(), @@ -225,32 +225,43 @@ where } fn eval_stimulus(&self, stim: &S) - -> (Array3>, Array3>) + -> (Vec>, Vec>) { trace!("eval_stimulus begin"); let (e, h) = self.diag.instrument_stimuli(|| { let dim = self.size(); + let dim_len = dim.product_sum_usize(); let feature_size = self.feature_size(); let t_sec = self.time(); let timestep = self.meta.time_step; - // TODO(perf): do this in one loop! - let e = ndarray::Zip::from(ndarray::indices( - [dim.z() as usize, dim.y() as usize, dim.x() as usize] - )).par_map_collect(|(z, y, x)| { - let pos_idx = Index::new(x as _, y as _, z as _); - let pos_meters = pos_idx.to_meters(feature_size); - let densities = stim.at(t_sec, pos_meters); - densities.e.cast::() * timestep - }); - let h = ndarray::Zip::from(ndarray::indices( - [dim.z() as usize, dim.y() as usize, dim.x() as usize] - )).par_map_collect(|(z, y, x)| { - let pos_idx = Index::new(x as _, y as _, z as _); - let pos_meters = pos_idx.to_meters(feature_size); - let densities = stim.at(t_sec, pos_meters); - densities.h.cast::() * timestep + // we'll evaluate in parallel each row (const y/z) of the stimulus. + let mut backing = Vec::new(); + backing.resize_with(dim_len, Default::default); + + rayon::scope(|s| { + let mut undispatched_backing = &mut backing[..]; + for z in 0..dim.z() { + for y in 0..dim.y() { + let this_slice; + (this_slice, undispatched_backing) = undispatched_backing.split_at_mut(dim.x() as usize); + let view = OffsetDimSlice::new(Vec3u::new(0, y, z), Vec3u::new(this_slice.len() as u32, 1, 1), this_slice); + s.spawn(move |_| stim.eval_into(t_sec, feature_size, view)); + } + } }); + + // unpack the E and H portions of the stimulus + let mut e = Vec::new(); + e.reserve(dim_len); + let mut h = Vec::new(); + h.reserve(dim_len); + + for field in backing { + e.push(field.e.cast::() * timestep); + h.push(field.h.cast::() * timestep); + } + (e, h) }); trace!("eval_stimulus end"); diff --git a/crates/coremem/src/stim.rs b/crates/coremem/src/stim.rs index ebc8e0b..dc7ca2a 100644 --- a/crates/coremem/src/stim.rs +++ b/crates/coremem/src/stim.rs @@ -59,14 +59,41 @@ impl Visitor<&S> for &mut StimulusEvaluator { } } +struct StimEvalInto<'a> { + into: OffsetDimSlice<&'a mut [Fields]>, + t_sec: f32, + feat_size: f32, +} + +impl<'a, S: AbstractStimulus> Visitor<&S> for StimEvalInto<'a> { + fn visit(&mut self, next: &S) { + next.eval_into(self.t_sec, self.feat_size, self.into.as_mut()); + } +} + +// impl AbstractStimulus for L +// where +// for<'a, 'b> &'a L: Visit<&'b mut StimulusEvaluator>, +// { +// fn at(&self, t_sec: f32, pos: Meters) -> Fields { +// let mut ev = StimulusEvaluator { t_sec, pos, fields: Fields::default()}; +// self.visit(&mut ev); +// ev.fields +// } +// } + impl AbstractStimulus for L where - for<'a, 'b> &'a L: Visit<&'b mut StimulusEvaluator>, + for<'a, 'b> &'a L: Visit>, { - fn at(&self, t_sec: f32, pos: Meters) -> Fields { - let mut ev = StimulusEvaluator { t_sec, pos, fields: Fields::default()}; - self.visit(&mut ev); - ev.fields + fn at(&self, _t_sec: f32, _pos: Meters) -> Fields { + // TODO: if we replace `_pos` with `feat_size`, `Index`, we can replace this with a generic + // impl that calls eval_into one a one-len slice. + unimplemented!(); + } + fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) { + let ev = StimEvalInto { t_sec, feat_size, into }; + self.visit(ev); } } @@ -82,6 +109,12 @@ where pub struct DynStimuli(Vec>); impl DynStimuli { + pub fn new() -> Self { + Self::default() + } + pub fn from_vec(stim: Vec>) -> Self { + Self(stim) + } pub fn push(&mut self, a: Box) { self.0.push(a) } @@ -91,6 +124,11 @@ impl AbstractStimulus for DynStimuli { self.0.iter().map(|i| i.at(t_sec, pos)) .fold(Fields::default(), core::ops::Add::add) } + fn eval_into(&self, t_sec: f32, feat_size: f32, mut into: OffsetDimSlice<&mut [Fields]>) { + for i in &self.0 { + i.eval_into(t_sec, feat_size, into.as_mut()); + } + } } // // impl AbstractStimulus for Box { @@ -405,6 +443,11 @@ impl AbstractStimulus for Gated { Default::default() } } + fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) { + if (self.start..self.end).contains(&t_sec) { + self.inner.eval_into(t_sec, feat_size, into); + } + } } #[derive(Clone)] @@ -436,6 +479,9 @@ impl AbstractStimulus for Shifted { fn at(&self, t_sec: f32, pos: Meters) -> Fields { self.inner.at(t_sec - self.start_at, pos) } + fn eval_into(&self, t_sec: f32, feat_size: f32, into: OffsetDimSlice<&mut [Fields]>) { + self.inner.eval_into(t_sec - self.start_at, feat_size, into) + } } #[cfg(test)]