From 82af4b100d1471275d5497b2f79c96a76c8c55cc Mon Sep 17 00:00:00 2001 From: colin Date: Mon, 22 Aug 2022 01:07:27 -0700 Subject: [PATCH] driver: optimize the `step_multiple` step count calculation this was using a stupid amount of compute. we still have about 7% time unaccounted for. down from 12-15%. --- crates/coremem/src/driver.rs | 32 ++++++++++++++++++-------------- crates/coremem/src/render.rs | 11 +++++++++++ 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/crates/coremem/src/driver.rs b/crates/coremem/src/driver.rs index 32fcb7b..d6b4e33 100644 --- a/crates/coremem/src/driver.rs +++ b/crates/coremem/src/driver.rs @@ -258,23 +258,26 @@ where self.render(); } - // TODO: optimize. this single block takes as much as 12% of net execution time - let mut can_step = 1; - while can_step < at_most - && !self.renderer.any_work_for_frame(start_step + can_step as u64) - && !self.stimuli.any_work_for_frame(start_step + can_step as u64) - { - can_step += 1; - } + // maybe the renderer or stimulus needs servicing before the max frame the user asked for. + // step less than `at_most`, in that case. + let next_frame_for_user = start_step + at_most as u64; + let next_frame_to_render = self.renderer.next_frame_for_work(start_step); + let next_frame_for_stim = self.stimuli.next_frame_for_work(start_step); + let step_to = [Some(next_frame_for_user), next_frame_to_render, Some(next_frame_for_stim)] + .into_iter() + .flatten() + .min() + .unwrap(); + let steps_this_time = (step_to - start_step).try_into().unwrap(); let meta = self.state.meta(); let stim = self.stimuli.get_for(meta, start_step); // prefetch the next stimulus, in the background. - self.stimuli.start_job(meta, start_step + can_step as u64); + self.stimuli.start_job(meta, step_to); trace!("step begin"); - self.diag.instrument_step(can_step as u64, || { - self.state.step_multiple(can_step, &stim); + self.diag.instrument_step(steps_this_time as u64, || { + self.state.step_multiple(steps_this_time, &stim); }); trace!("step end"); @@ -291,7 +294,7 @@ where percent_complete, sim_time, step, diagstr ); } - can_step as u32 + steps_this_time } pub fn step_multiple(&mut self, num_steps: u32) { let mut steps_remaining = num_steps; @@ -475,8 +478,9 @@ impl StimAccess { // with the worker joined, there should be no outstanding handles on the arc. Arc::try_unwrap(self.stim).ok().unwrap().into_inner().unwrap() } - fn any_work_for_frame(&self, frame: u64) -> bool { - frame % self.steps_per_stimulus == 0 + fn next_frame_for_work(&self, after: u64) -> u64 { + let f = after + self.steps_per_stimulus; + f - f % self.steps_per_stimulus } /// used internally. diff --git a/crates/coremem/src/render.rs b/crates/coremem/src/render.rs index b56a033..2d513bd 100644 --- a/crates/coremem/src/render.rs +++ b/crates/coremem/src/render.rs @@ -535,6 +535,14 @@ impl MultiRendererElement { Some(end) => frame < end, } } + fn next_frame_for_work(&self, after: u64) -> Option { + let max_frame = after + self.step_frequency; + let max_frame = max_frame - max_frame % self.step_frequency; + match self.step_limit { + None => Some(max_frame), + Some(end) => Some(max_frame).filter(|&f| f < end) + } + } } pub struct MultiRenderer { @@ -567,6 +575,9 @@ impl MultiRenderer { pub fn any_work_for_frame(&self, frame: u64) -> bool { self.renderers.read().unwrap().iter().any(|m| m.work_this_frame(frame)) } + pub fn next_frame_for_work(&self, after: u64) -> Option { + self.renderers.read().unwrap().iter().flat_map(|m| m.next_frame_for_work(after)).min() + } } impl Renderer for MultiRenderer {