spirv: instrument the device read/write operations
This commit is contained in:
@@ -11,6 +11,8 @@ pub struct Diagnostics {
|
||||
time_rendering: Duration,
|
||||
time_blocked_on_stim: Duration,
|
||||
time_blocked_on_render: Duration,
|
||||
time_reading_device: Duration,
|
||||
time_writing_device: Duration,
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
@@ -33,6 +35,8 @@ impl Diagnostics {
|
||||
time_rendering: Default::default(),
|
||||
time_blocked_on_stim: Default::default(),
|
||||
time_blocked_on_render: Default::default(),
|
||||
time_reading_device: Default::default(),
|
||||
time_writing_device: Default::default(),
|
||||
start_time: Instant::now(),
|
||||
}
|
||||
}
|
||||
@@ -49,7 +53,7 @@ impl Diagnostics {
|
||||
let other_time = overall_time - step_time - stim_block_time - render_block_time - render_prep_time;
|
||||
let fps = (self.frames_completed as f64) / overall_time;
|
||||
|
||||
format!("fps: {:6.2} (step: {:.1}s, [stim: {:.1}s render: {:.1}s], blocked: (stim: {:.1}s render: {:.1}s), render_prep: {:.1}s, other: {:.1}s)",
|
||||
let main_line = format!("fps: {:6.2} (step: {:.1}s, [stim: {:.1}s render: {:.1}s], blocked: (stim: {:.1}s render: {:.1}s), render_prep: {:.1}s, other: {:.1}s)",
|
||||
fps,
|
||||
step_time,
|
||||
stim_time,
|
||||
@@ -58,7 +62,15 @@ impl Diagnostics {
|
||||
render_block_time,
|
||||
render_prep_time,
|
||||
other_time,
|
||||
)
|
||||
);
|
||||
let device_write_time = self.time_writing_device.as_secs_f64();
|
||||
let device_read_time = self.time_reading_device.as_secs_f64();
|
||||
let sub_line = format!("gpu> write: {:.1}s read: {:.1}s",
|
||||
device_write_time,
|
||||
device_read_time,
|
||||
);
|
||||
|
||||
format!("{}\n {}", main_line, sub_line)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,5 +133,16 @@ impl SyncDiagnostics {
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn instrument_read_device<R, F: FnOnce() -> R>(&self, f: F) -> R {
|
||||
let (elapsed, ret) = Self::measure(f);
|
||||
self.0.lock().unwrap().time_reading_device += elapsed;
|
||||
ret
|
||||
}
|
||||
pub fn instrument_write_device<R, F: FnOnce() -> R>(&self, f: F) -> R {
|
||||
let (elapsed, ret) = Self::measure(f);
|
||||
self.0.lock().unwrap().time_writing_device += elapsed;
|
||||
ret
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@@ -1,3 +1,5 @@
|
||||
use crate::diagnostics::SyncDiagnostics;
|
||||
|
||||
use coremem_cross::mat::Material;
|
||||
use coremem_cross::real::Real;
|
||||
use coremem_cross::step::{SimMeta, StepEContext, StepHContext};
|
||||
@@ -11,6 +13,7 @@ pub struct CpuBackend;
|
||||
impl<R: Real, M: Material<R>> SimBackend<R, M> for CpuBackend {
|
||||
fn step_n(
|
||||
&mut self,
|
||||
_diag: &SyncDiagnostics,
|
||||
meta: SimMeta<R>,
|
||||
mat: &[M],
|
||||
stim_e: &[Vec3<R>],
|
||||
|
@@ -5,6 +5,8 @@ use std::num::NonZeroU64;
|
||||
use wgpu;
|
||||
use wgpu::util::DeviceExt as _;
|
||||
|
||||
use crate::diagnostics::SyncDiagnostics;
|
||||
|
||||
use coremem_cross::vec::{Vec3, Vec3u};
|
||||
use coremem_cross::step::SimMeta;
|
||||
|
||||
@@ -55,6 +57,7 @@ impl WgpuHandles {
|
||||
impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBackend {
|
||||
fn step_n(
|
||||
&mut self,
|
||||
diag: &SyncDiagnostics,
|
||||
meta: SimMeta<R>,
|
||||
mat: &[M],
|
||||
stim_cpu_e: &[Vec3<R>],
|
||||
@@ -228,15 +231,17 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
|
||||
field_bytes as u64,
|
||||
);
|
||||
|
||||
queue.submit(Some(encoder.finish()));
|
||||
diag.instrument_write_device(move || {
|
||||
queue.submit(Some(encoder.finish()));
|
||||
});
|
||||
|
||||
let e_readback_slice = e_readback_buffer.slice(..);
|
||||
let e_readback_future = e_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
|
||||
e.copy_from_slice(unsafe {
|
||||
from_bytes(e_readback_slice.get_mapped_range().as_ref())
|
||||
});
|
||||
e_readback_buffer.unmap();
|
||||
e.copy_from_slice(unsafe {
|
||||
from_bytes(e_readback_slice.get_mapped_range().as_ref())
|
||||
});
|
||||
e_readback_buffer.unmap();
|
||||
});
|
||||
|
||||
let h_readback_slice = h_readback_buffer.slice(..);
|
||||
let h_readback_future = h_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
|
||||
@@ -254,11 +259,15 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
|
||||
m_readback_buffer.unmap();
|
||||
});
|
||||
|
||||
// optimization note: it may be possible to use `WaitForSubmission`
|
||||
// and copy data to/from even as the GPU begins executing the next job.
|
||||
device.poll(wgpu::Maintain::Wait);
|
||||
|
||||
futures::executor::block_on(futures::future::join(
|
||||
e_readback_future, futures::future::join(
|
||||
h_readback_future, m_readback_future)));
|
||||
diag.instrument_read_device(move || {
|
||||
futures::executor::block_on(futures::future::join(
|
||||
e_readback_future, futures::future::join(
|
||||
h_readback_future, m_readback_future)));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -24,6 +24,7 @@ pub use gpu::WgpuBackend;
|
||||
pub trait SimBackend<R, M> {
|
||||
fn step_n(
|
||||
&mut self,
|
||||
diag: &SyncDiagnostics,
|
||||
meta: SimMeta<R>,
|
||||
mat: &[M],
|
||||
stim_e: &[Vec3<R>],
|
||||
@@ -101,6 +102,7 @@ where
|
||||
fn step_multiple<S: Stimulus>(&mut self, num_steps: u32, stim: &S) {
|
||||
let (stim_e, stim_h) = self.eval_stimulus(stim);
|
||||
self.backend.step_n(
|
||||
&self.diag,
|
||||
self.meta,
|
||||
self.mat.as_slice(),
|
||||
&*stim_e,
|
||||
@@ -226,8 +228,7 @@ where
|
||||
fn eval_stimulus<'a, S: Stimulus>(&self, stim: &'a S)
|
||||
-> (&'a [Vec3<R>], &'a [Vec3<R>])
|
||||
{
|
||||
trace!("eval_stimulus begin");
|
||||
let (e, h) = self.diag.instrument_stimuli(|| {
|
||||
let (e, h) = self.diag.instrument_stimuli_blocked(|| {
|
||||
let dim = self.size();
|
||||
let dim_len = dim.product_sum_usize();
|
||||
let feature_size = self.feature_size();
|
||||
@@ -242,7 +243,6 @@ where
|
||||
// TODO: find a way to remove this
|
||||
unsafe { std::mem::transmute((e, h)) }
|
||||
});
|
||||
trace!("eval_stimulus end");
|
||||
(e, h)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user