diagnostics: instrument the stimulus and stimulus blocked time

2022-08-21 18:10:09 -07:00 · 2022-08-21 18:10:09 -07:00 · a38734a1ed
commit a38734a1ed
parent 18dd66530a
2 changed files with 38 additions and 19 deletions
--- a/crates/coremem/src/diagnostics.rs
+++ b/crates/coremem/src/diagnostics.rs
@ -8,8 +8,9 @@ pub struct Diagnostics {
    time_spent_stepping: Duration,
    time_spent_on_stimuli: Duration,
    time_spent_prepping_render: Duration,
-    time_spent_blocked_on_render: Duration,
    time_spent_rendering: Duration,
+    time_spent_blocked_on_stim: Duration,
+    time_spent_blocked_on_render: Duration,
    start_time: Instant,
 }

@ -29,8 +30,9 @@ impl Diagnostics {
            time_spent_stepping: Default::default(),
            time_spent_on_stimuli: Default::default(),
            time_spent_prepping_render: Default::default(),
-            time_spent_blocked_on_render: Default::default(),
            time_spent_rendering: Default::default(),
+            time_spent_blocked_on_stim: Default::default(),
+            time_spent_blocked_on_render: Default::default(),
            start_time: Instant::now(),
        }
    }
@ -40,17 +42,19 @@ impl Diagnostics {
        let stim_time = self.time_spent_on_stimuli.as_secs_f64();
        let render_time = self.time_spent_rendering.as_secs_f64();
        let render_prep_time = self.time_spent_prepping_render.as_secs_f64();
-        let block_time = self.time_spent_blocked_on_render.as_secs_f64();
+        let stim_block_time = self.time_spent_blocked_on_stim.as_secs_f64();
+        let render_block_time = self.time_spent_blocked_on_render.as_secs_f64();
        let overall_time = self.start_time.elapsed().as_secs_f64();
        let fps = (self.frames_completed as f64) / overall_time;
-        format!("fps: {:6.2} (sim: {:.1}s, stim: {:.1}s, [render: {:.1}s], blocked: {:.1}s, render_prep: {:.1}s, other: {:.1}s)",
+        format!("fps: {:6.2} (step: {:.1}s, [stim: {:.1}s render: {:.1}s], blocked: (stim: {:.1}s render: {:.1}s), render_prep: {:.1}s, other: {:.1}s)",
            fps,
            step_time,
            stim_time,
            render_time,
-            block_time,
+            stim_block_time,
+            render_block_time,
            render_prep_time,
-            overall_time - step_time /*- stim_time*/ - block_time - render_prep_time,
+            overall_time - step_time - stim_block_time - render_block_time - render_prep_time,
        )
    }
 }
@ -94,6 +98,13 @@ impl SyncDiagnostics {
        ret
    }

+    /// record the duration spent blocking the simulation because the stimulus queue is full.
+    pub fn instrument_stimuli_blocked<R, F: FnOnce() -> R>(&self, f: F) -> R{
+        let (elapsed, ret) = Self::measure(f);
+        self.0.lock().unwrap().time_spent_blocked_on_stim += elapsed;
+        ret
+    }
+
    /// record the duration spent blocking the simulation because the render queue is full.
    pub fn instrument_render_blocked<R, F: FnOnce() -> R>(&self, f: F) -> R{
        let (elapsed, ret) = Self::measure(f);
--- a/crates/coremem/src/driver.rs
+++ b/crates/coremem/src/driver.rs
@ -59,7 +59,7 @@ impl<S: AbstractSim, Stim> Driver<S, Stim> {
                Arc::new(meas::Energy::world()),
                Arc::new(meas::Power::world()),
            ],
-            stimuli: StimAccess::new(stimuli),
+            stimuli: StimAccess::new(diag.clone(), stimuli),
            sim_end_time: None,
            diag,
            last_diag_time: Instant::now(),
@ -102,7 +102,7 @@ impl<S: AbstractSim, Stim> Driver<S, Stim> {
            render_pool: self.render_pool,
            render_channel: self.render_channel,
            measurements: self.measurements,
-            stimuli: StimAccess::new(self.stimuli.into_inner().append(s)),
+            stimuli: StimAccess::new(self.diag.clone(), self.stimuli.into_inner().append(s)),
            sim_end_time: self.sim_end_time,
            diag: self.diag,
            last_diag_time: self.last_diag_time,
@ -115,7 +115,7 @@ impl<S: AbstractSim, Stim> Driver<S, Stim> {
            render_pool: self.render_pool,
            render_channel: self.render_channel,
            measurements: self.measurements,
-            stimuli: StimAccess::new(stimuli),
+            stimuli: StimAccess::new(self.diag.clone(), stimuli),
            sim_end_time: self.sim_end_time,
            diag: self.diag,
            last_diag_time: self.last_diag_time,
@ -261,17 +261,18 @@ where
        while can_step < at_most && !self.renderer.any_work_for_frame(start_step + can_step as u64) {
            can_step += 1;
        }
+
+        let meta = self.state.meta();
+        let stim = self.stimuli.get_for(meta, start_step);
+        // prefetch the next stimulus, in the background.
+        self.stimuli.start_job(meta, start_step + can_step as u64);
+
        trace!("step begin");
        self.diag.instrument_step(can_step as u64, || {
-            let meta = self.state.meta();
-            let stim = self.stimuli.get_for(meta, start_step);
-
-            // prefetch the next stimulus, in the background.
-            self.stimuli.start_job(meta, start_step + can_step as u64);
-
            self.state.step_multiple(can_step, &stim);
        });
        trace!("step end");
+
        if self.last_diag_time.elapsed().as_secs_f64() >= 5.0 {
            self.last_diag_time = Instant::now();
            let step = self.state.step_no();
@ -438,6 +439,7 @@ pub type ModulatedStaticField<T> = ModulatedVectorField<DimSlice<Vec<Fields>>, T
 /// come back and re-request that time later, expecting that it's been evaluated in the background.
 struct StimAccess<T> {
    stim: Arc<Mutex<T>>,
+    diag: SyncDiagnostics,
    /// is the background thread doing work (or, has it completed work and placed it on the return
    /// queue)?
    /// A.K.A. "can i safely do a blocking recv on response_channel".
@ -451,9 +453,10 @@ struct StimAccess<T> {
 }

 impl<T> StimAccess<T> {
-    fn new(stim: T) -> Self {
+    fn new(diag: SyncDiagnostics, stim: T) -> Self {
        Self {
            stim: Arc::new(Mutex::new(stim)),
+            diag,
            outstanding: Cell::new(false),
            response_channel: sync_channel(0),
            worker: ThreadPool::new(1),
@ -475,7 +478,9 @@ impl<T> StimAccess<T> {
        }

        // block until job is complete and receive the result
-        let completed = self.response_channel.1.recv().unwrap();
+        let completed = self.diag.instrument_stimuli_blocked(|| {
+            self.response_channel.1.recv().unwrap()
+        });
        let (job_meta, job_step, rendered) = completed;
        self.outstanding.set(false);

@ -499,10 +504,12 @@ impl<T: DriverStimulus + Send + 'static> StimAccess<T> {
        assert!(!self.outstanding.get());
        self.outstanding.set(true);

+        let diag = self.diag.clone();
        let stim = self.stim.clone();
        let response_handle = self.response_channel.0.clone();
        self.worker.execute(move || {
-            let rendered = {
+            trace!("eval_stimulus begin");
+            let rendered = diag.instrument_stimuli(|| {
                let stim = stim.lock().unwrap();
                let opt = stim.optimized_for(meta, step);
                opt.as_ref().rendered(
@ -514,7 +521,8 @@ impl<T: DriverStimulus + Send + 'static> StimAccess<T> {
                ).into_owned()
                //^ this 'into_owned' ought to be a no-op.
                //^ it would only ever be borrowed if we accidentally called `rendered` twice.
-            };
+            });
+
            // NB: this is blocking.
            // it's important that we drop any locks before replying.
            response_handle.send((meta, step, rendered)).unwrap();