spirv: instrument the device read/write operations

This commit is contained in:
2022-08-21 18:51:51 -07:00
parent 6c9a6e1ffa
commit 98d6a5b34f
4 changed files with 48 additions and 13 deletions

View File

@@ -11,6 +11,8 @@ pub struct Diagnostics {
time_rendering: Duration,
time_blocked_on_stim: Duration,
time_blocked_on_render: Duration,
time_reading_device: Duration,
time_writing_device: Duration,
start_time: Instant,
}
@@ -33,6 +35,8 @@ impl Diagnostics {
time_rendering: Default::default(),
time_blocked_on_stim: Default::default(),
time_blocked_on_render: Default::default(),
time_reading_device: Default::default(),
time_writing_device: Default::default(),
start_time: Instant::now(),
}
}
@@ -49,7 +53,7 @@ impl Diagnostics {
let other_time = overall_time - step_time - stim_block_time - render_block_time - render_prep_time;
let fps = (self.frames_completed as f64) / overall_time;
format!("fps: {:6.2} (step: {:.1}s, [stim: {:.1}s render: {:.1}s], blocked: (stim: {:.1}s render: {:.1}s), render_prep: {:.1}s, other: {:.1}s)",
let main_line = format!("fps: {:6.2} (step: {:.1}s, [stim: {:.1}s render: {:.1}s], blocked: (stim: {:.1}s render: {:.1}s), render_prep: {:.1}s, other: {:.1}s)",
fps,
step_time,
stim_time,
@@ -58,7 +62,15 @@ impl Diagnostics {
render_block_time,
render_prep_time,
other_time,
)
);
let device_write_time = self.time_writing_device.as_secs_f64();
let device_read_time = self.time_reading_device.as_secs_f64();
let sub_line = format!("gpu> write: {:.1}s read: {:.1}s",
device_write_time,
device_read_time,
);
format!("{}\n {}", main_line, sub_line)
}
}
@@ -121,5 +133,16 @@ impl SyncDiagnostics {
ret
}
pub fn instrument_read_device<R, F: FnOnce() -> R>(&self, f: F) -> R {
let (elapsed, ret) = Self::measure(f);
self.0.lock().unwrap().time_reading_device += elapsed;
ret
}
pub fn instrument_write_device<R, F: FnOnce() -> R>(&self, f: F) -> R {
let (elapsed, ret) = Self::measure(f);
self.0.lock().unwrap().time_writing_device += elapsed;
ret
}
}

View File

@@ -1,3 +1,5 @@
use crate::diagnostics::SyncDiagnostics;
use coremem_cross::mat::Material;
use coremem_cross::real::Real;
use coremem_cross::step::{SimMeta, StepEContext, StepHContext};
@@ -11,6 +13,7 @@ pub struct CpuBackend;
impl<R: Real, M: Material<R>> SimBackend<R, M> for CpuBackend {
fn step_n(
&mut self,
_diag: &SyncDiagnostics,
meta: SimMeta<R>,
mat: &[M],
stim_e: &[Vec3<R>],

View File

@@ -5,6 +5,8 @@ use std::num::NonZeroU64;
use wgpu;
use wgpu::util::DeviceExt as _;
use crate::diagnostics::SyncDiagnostics;
use coremem_cross::vec::{Vec3, Vec3u};
use coremem_cross::step::SimMeta;
@@ -55,6 +57,7 @@ impl WgpuHandles {
impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBackend {
fn step_n(
&mut self,
diag: &SyncDiagnostics,
meta: SimMeta<R>,
mat: &[M],
stim_cpu_e: &[Vec3<R>],
@@ -228,15 +231,17 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
field_bytes as u64,
);
queue.submit(Some(encoder.finish()));
diag.instrument_write_device(move || {
queue.submit(Some(encoder.finish()));
});
let e_readback_slice = e_readback_buffer.slice(..);
let e_readback_future = e_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
e.copy_from_slice(unsafe {
from_bytes(e_readback_slice.get_mapped_range().as_ref())
});
e_readback_buffer.unmap();
e.copy_from_slice(unsafe {
from_bytes(e_readback_slice.get_mapped_range().as_ref())
});
e_readback_buffer.unmap();
});
let h_readback_slice = h_readback_buffer.slice(..);
let h_readback_future = h_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
@@ -254,11 +259,15 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
m_readback_buffer.unmap();
});
// optimization note: it may be possible to use `WaitForSubmission`
// and copy data to/from even as the GPU begins executing the next job.
device.poll(wgpu::Maintain::Wait);
futures::executor::block_on(futures::future::join(
e_readback_future, futures::future::join(
h_readback_future, m_readback_future)));
diag.instrument_read_device(move || {
futures::executor::block_on(futures::future::join(
e_readback_future, futures::future::join(
h_readback_future, m_readback_future)));
});
}
}

View File

@@ -24,6 +24,7 @@ pub use gpu::WgpuBackend;
pub trait SimBackend<R, M> {
fn step_n(
&mut self,
diag: &SyncDiagnostics,
meta: SimMeta<R>,
mat: &[M],
stim_e: &[Vec3<R>],
@@ -101,6 +102,7 @@ where
fn step_multiple<S: Stimulus>(&mut self, num_steps: u32, stim: &S) {
let (stim_e, stim_h) = self.eval_stimulus(stim);
self.backend.step_n(
&self.diag,
self.meta,
self.mat.as_slice(),
&*stim_e,
@@ -226,8 +228,7 @@ where
fn eval_stimulus<'a, S: Stimulus>(&self, stim: &'a S)
-> (&'a [Vec3<R>], &'a [Vec3<R>])
{
trace!("eval_stimulus begin");
let (e, h) = self.diag.instrument_stimuli(|| {
let (e, h) = self.diag.instrument_stimuli_blocked(|| {
let dim = self.size();
let dim_len = dim.product_sum_usize();
let feature_size = self.feature_size();
@@ -242,7 +243,6 @@ where
// TODO: find a way to remove this
unsafe { std::mem::transmute((e, h)) }
});
trace!("eval_stimulus end");
(e, h)
}
}