try printing out gpu timesteps
this seems to just print all 0 on my laptop. maybe it'll work better on a newer GPU. following the example in Embark's rust-gpu: runners/wgpu/src/compute.rs
This commit is contained in:
parent
6a7a6bc170
commit
1387506511
|
@ -83,6 +83,15 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
|
|||
let step_bind_group_layout = &handles.step_bind_group_layout;
|
||||
let step_e_pipeline = &handles.step_e_pipeline;
|
||||
let step_h_pipeline = &handles.step_h_pipeline;
|
||||
let timestamp_buffer = device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("timestamps"),
|
||||
// each timestamp is 8 bytes, and we do 4 per step
|
||||
size: 8 * 4 * num_steps as u64,
|
||||
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
|
||||
mapped_at_creation: true,
|
||||
});
|
||||
timestamp_buffer.unmap();
|
||||
|
||||
let sim_meta_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||
label: Some("gpu-side simulation metadata"),
|
||||
contents: to_bytes(&[meta][..]),
|
||||
|
@ -187,24 +196,34 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
|
|||
],
|
||||
});
|
||||
|
||||
let queries = device.create_query_set(&wgpu::QuerySetDescriptor {
|
||||
label: None,
|
||||
count: 4 * num_steps,
|
||||
ty: wgpu::QueryType::Timestamp,
|
||||
});
|
||||
|
||||
let workgroups = ((dim.x()+3) / 4, (dim.y()+3) / 4, (dim.z()+3) / 4);
|
||||
|
||||
let mut encoder =
|
||||
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
|
||||
|
||||
for _ in 0..num_steps {
|
||||
for step in 0..num_steps {
|
||||
{
|
||||
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
|
||||
cpass.set_bind_group(0, &bind_group, &[]);
|
||||
cpass.set_pipeline(&step_e_pipeline);
|
||||
cpass.write_timestamp(&queries, 4*step);
|
||||
cpass.dispatch(workgroups.0, workgroups.1, workgroups.2);
|
||||
cpass.write_timestamp(&queries, 4*step + 1);
|
||||
}
|
||||
|
||||
{
|
||||
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
|
||||
cpass.set_bind_group(0, &bind_group, &[]);
|
||||
cpass.set_pipeline(&step_h_pipeline);
|
||||
cpass.write_timestamp(&queries, 4*step + 2);
|
||||
cpass.dispatch(workgroups.0, workgroups.1, workgroups.2);
|
||||
cpass.write_timestamp(&queries, 4*step + 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -232,6 +251,8 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
|
|||
field_bytes as u64,
|
||||
);
|
||||
|
||||
encoder.resolve_query_set(&queries, 0..4*num_steps, ×tamp_buffer, 0);
|
||||
|
||||
diag.instrument_write_device(move || {
|
||||
queue.submit(Some(encoder.finish()));
|
||||
});
|
||||
|
@ -260,6 +281,20 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
|
|||
m_readback_buffer.unmap();
|
||||
});
|
||||
|
||||
|
||||
// let timestamp_period = queue.get_timestamp_period();
|
||||
let timestamp_readback_slice = timestamp_buffer.slice(..);
|
||||
let timestamp_readback_future = timestamp_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
|
||||
{
|
||||
let mapped = timestamp_readback_slice.get_mapped_range();
|
||||
let timings: &[u64] = unsafe {
|
||||
from_bytes(mapped.as_ref())
|
||||
};
|
||||
println!("timings: {:?}", timings);
|
||||
}
|
||||
timestamp_buffer.unmap();
|
||||
});
|
||||
|
||||
// optimization note: it may be possible to use `WaitForSubmission`
|
||||
// and copy data to/from even as the GPU begins executing the next job.
|
||||
device.poll(wgpu::Maintain::Wait);
|
||||
|
@ -267,7 +302,11 @@ impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBacke
|
|||
diag.instrument_read_device(move || {
|
||||
futures::executor::block_on(futures::future::join(
|
||||
e_readback_future, futures::future::join(
|
||||
h_readback_future, m_readback_future)));
|
||||
h_readback_future, futures::future::join(
|
||||
m_readback_future, timestamp_readback_future
|
||||
)
|
||||
)
|
||||
));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -327,7 +366,11 @@ async fn open_device(max_buf_size: u64) -> (wgpu::Device, wgpu::Queue) {
|
|||
.request_device(
|
||||
&wgpu::DeviceDescriptor {
|
||||
label: None,
|
||||
features: wgpu::Features::SPIRV_SHADER_PASSTHROUGH,
|
||||
features: (
|
||||
wgpu::Features::empty()
|
||||
.union(wgpu::Features::SPIRV_SHADER_PASSTHROUGH)
|
||||
.union(wgpu::Features::TIMESTAMP_QUERY)
|
||||
),
|
||||
limits,
|
||||
},
|
||||
None,
|
||||
|
|
Loading…
Reference in New Issue