driver: optimize the step_multiple
step count calculation
this was using a stupid amount of compute. we still have about 7% time unaccounted for. down from 12-15%.
This commit is contained in:
@@ -258,23 +258,26 @@ where
|
|||||||
self.render();
|
self.render();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: optimize. this single block takes as much as 12% of net execution time
|
// maybe the renderer or stimulus needs servicing before the max frame the user asked for.
|
||||||
let mut can_step = 1;
|
// step less than `at_most`, in that case.
|
||||||
while can_step < at_most
|
let next_frame_for_user = start_step + at_most as u64;
|
||||||
&& !self.renderer.any_work_for_frame(start_step + can_step as u64)
|
let next_frame_to_render = self.renderer.next_frame_for_work(start_step);
|
||||||
&& !self.stimuli.any_work_for_frame(start_step + can_step as u64)
|
let next_frame_for_stim = self.stimuli.next_frame_for_work(start_step);
|
||||||
{
|
let step_to = [Some(next_frame_for_user), next_frame_to_render, Some(next_frame_for_stim)]
|
||||||
can_step += 1;
|
.into_iter()
|
||||||
}
|
.flatten()
|
||||||
|
.min()
|
||||||
|
.unwrap();
|
||||||
|
let steps_this_time = (step_to - start_step).try_into().unwrap();
|
||||||
|
|
||||||
let meta = self.state.meta();
|
let meta = self.state.meta();
|
||||||
let stim = self.stimuli.get_for(meta, start_step);
|
let stim = self.stimuli.get_for(meta, start_step);
|
||||||
// prefetch the next stimulus, in the background.
|
// prefetch the next stimulus, in the background.
|
||||||
self.stimuli.start_job(meta, start_step + can_step as u64);
|
self.stimuli.start_job(meta, step_to);
|
||||||
|
|
||||||
trace!("step begin");
|
trace!("step begin");
|
||||||
self.diag.instrument_step(can_step as u64, || {
|
self.diag.instrument_step(steps_this_time as u64, || {
|
||||||
self.state.step_multiple(can_step, &stim);
|
self.state.step_multiple(steps_this_time, &stim);
|
||||||
});
|
});
|
||||||
trace!("step end");
|
trace!("step end");
|
||||||
|
|
||||||
@@ -291,7 +294,7 @@ where
|
|||||||
percent_complete, sim_time, step, diagstr
|
percent_complete, sim_time, step, diagstr
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
can_step as u32
|
steps_this_time
|
||||||
}
|
}
|
||||||
pub fn step_multiple(&mut self, num_steps: u32) {
|
pub fn step_multiple(&mut self, num_steps: u32) {
|
||||||
let mut steps_remaining = num_steps;
|
let mut steps_remaining = num_steps;
|
||||||
@@ -475,8 +478,9 @@ impl<R, T> StimAccess<R, T> {
|
|||||||
// with the worker joined, there should be no outstanding handles on the arc.
|
// with the worker joined, there should be no outstanding handles on the arc.
|
||||||
Arc::try_unwrap(self.stim).ok().unwrap().into_inner().unwrap()
|
Arc::try_unwrap(self.stim).ok().unwrap().into_inner().unwrap()
|
||||||
}
|
}
|
||||||
fn any_work_for_frame(&self, frame: u64) -> bool {
|
fn next_frame_for_work(&self, after: u64) -> u64 {
|
||||||
frame % self.steps_per_stimulus == 0
|
let f = after + self.steps_per_stimulus;
|
||||||
|
f - f % self.steps_per_stimulus
|
||||||
}
|
}
|
||||||
|
|
||||||
/// used internally.
|
/// used internally.
|
||||||
|
@@ -535,6 +535,14 @@ impl<S> MultiRendererElement<S> {
|
|||||||
Some(end) => frame < end,
|
Some(end) => frame < end,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fn next_frame_for_work(&self, after: u64) -> Option<u64> {
|
||||||
|
let max_frame = after + self.step_frequency;
|
||||||
|
let max_frame = max_frame - max_frame % self.step_frequency;
|
||||||
|
match self.step_limit {
|
||||||
|
None => Some(max_frame),
|
||||||
|
Some(end) => Some(max_frame).filter(|&f| f < end)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MultiRenderer<S> {
|
pub struct MultiRenderer<S> {
|
||||||
@@ -567,6 +575,9 @@ impl<S> MultiRenderer<S> {
|
|||||||
pub fn any_work_for_frame(&self, frame: u64) -> bool {
|
pub fn any_work_for_frame(&self, frame: u64) -> bool {
|
||||||
self.renderers.read().unwrap().iter().any(|m| m.work_this_frame(frame))
|
self.renderers.read().unwrap().iter().any(|m| m.work_this_frame(frame))
|
||||||
}
|
}
|
||||||
|
pub fn next_frame_for_work(&self, after: u64) -> Option<u64> {
|
||||||
|
self.renderers.read().unwrap().iter().flat_map(|m| m.next_frame_for_work(after)).min()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<S: AbstractSim> Renderer<S> for MultiRenderer<S> {
|
impl<S: AbstractSim> Renderer<S> for MultiRenderer<S> {
|
||||||
|
Reference in New Issue
Block a user