driver: optimization: don't memcopy the RenderedStimulus across threads

This commit is contained in:
2022-08-22 01:49:35 -07:00
parent 8268215441
commit 532dd26e22

View File

@@ -277,7 +277,7 @@ where
trace!("step begin"); trace!("step begin");
self.diag.instrument_step(steps_this_time as u64, || { self.diag.instrument_step(steps_this_time as u64, || {
self.state.step_multiple(steps_this_time, &stim); self.state.step_multiple(steps_this_time, &*stim);
}); });
trace!("step end"); trace!("step end");
@@ -451,9 +451,11 @@ struct StimAccess<R, T> {
/// A.K.A. "can i safely do a blocking recv on response_channel". /// A.K.A. "can i safely do a blocking recv on response_channel".
outstanding: Cell<bool>, outstanding: Cell<bool>,
/// data sent from worker thread back to the Driver side. /// data sent from worker thread back to the Driver side.
/// XXX: Boxing isn't necessary, but doing so means much less memcopy'ing over the channel
/// (just one pointer, instead of N^3 bytes). better for perf.
response_channel: ( response_channel: (
SyncSender<(SimMeta<f32>, u64, RenderedStimulus<R>)>, SyncSender<(SimMeta<f32>, u64, Box<RenderedStimulus<R>>)>,
Receiver<(SimMeta<f32>, u64, RenderedStimulus<R>)>, Receiver<(SimMeta<f32>, u64, Box<RenderedStimulus<R>>)>,
), ),
worker: ThreadPool, worker: ThreadPool,
} }
@@ -483,7 +485,7 @@ impl<R, T> StimAccess<R, T> {
/// waits for an outstanding job (if any). /// waits for an outstanding job (if any).
/// if the response matches the request, return the response, /// if the response matches the request, return the response,
/// else discard the response. /// else discard the response.
fn maybe_wait_for_job(&self, meta: SimMeta<f32>, step: u64) -> Option<RenderedStimulus<R>> { fn maybe_wait_for_job(&self, meta: SimMeta<f32>, step: u64) -> Option<Box<RenderedStimulus<R>>> {
if !self.outstanding.get() { if !self.outstanding.get() {
return None; return None;
} }
@@ -501,7 +503,7 @@ impl<R, T> StimAccess<R, T> {
} }
impl<R: Real, T: DriverStimulus<R> + Send + 'static> StimAccess<R, T> { impl<R: Real, T: DriverStimulus<R> + Send + 'static> StimAccess<R, T> {
fn get_for(&self, meta: SimMeta<f32>, step: u64) -> RenderedStimulus<R> { fn get_for(&self, meta: SimMeta<f32>, step: u64) -> Box<RenderedStimulus<R>> {
// either claim the outstanding job (if it exists and matches)... // either claim the outstanding job (if it exists and matches)...
self.maybe_wait_for_job(meta, step).unwrap_or_else(|| { self.maybe_wait_for_job(meta, step).unwrap_or_else(|| {
// or start a job and wait for it to complete inline // or start a job and wait for it to complete inline
@@ -525,13 +527,13 @@ impl<R: Real, T: DriverStimulus<R> + Send + 'static> StimAccess<R, T> {
let rendered = diag.instrument_stimuli(|| { let rendered = diag.instrument_stimuli(|| {
let stim = stim.lock().unwrap(); let stim = stim.lock().unwrap();
let opt = stim.optimized_for(meta, step); let opt = stim.optimized_for(meta, step);
opt.as_ref().rendered( Box::new(opt.as_ref().rendered(
meta.time_step().cast(), meta.time_step().cast(),
// TODO: convert this to an integer // TODO: convert this to an integer
meta.time_step().cast::<R>() * R::from_primitive(step), meta.time_step().cast::<R>() * R::from_primitive(step),
meta.feature_size().cast(), meta.feature_size().cast(),
meta.dim() meta.dim()
).into_owned() ).into_owned())
//^ this 'into_owned' ought to be a no-op. //^ this 'into_owned' ought to be a no-op.
//^ it would only ever be borrowed if we accidentally called `rendered` twice. //^ it would only ever be borrowed if we accidentally called `rendered` twice.
}); });