Add a test which runs stuff on the GPU with rust-gpu

This commit is contained in:
2021-07-25 16:44:53 -07:00
parent eecf848875
commit 2f043b92e6
6 changed files with 302 additions and 0 deletions

View File

@@ -18,6 +18,7 @@ enum_dispatch = "0.3"
env_logger = "0.7"
float_eq = "0.5"
font8x8 = "0.2"
futures = "0.3"
image = "0.23"
imageproc = "0.21"
indexmap = "1.6"
@@ -42,6 +43,12 @@ y4m = "0.7"
# erased-serde = "0.3"
# serde_cbor = "0.11"
wgpu = "0.9"
spirv-builder = { git = "https://github.com/EmbarkStudios/rust-gpu", features=["use-compiled-tools"] }
spirv-std = { git = "https://github.com/EmbarkStudios/rust-gpu" }
spirv-std-macros = { git = "https://github.com/EmbarkStudios/rust-gpu" }
[dev-dependencies]
criterion = "0.3"

224
src/gpusim.rs Normal file
View File

@@ -0,0 +1,224 @@
use futures::future::join;
use spirv_builder::{Capability, MetadataPrintout, SpirvBuilder};
use std::borrow::Cow;
use std::num::NonZeroU64;
use std::path::PathBuf;
use std::time::Duration;
use wgpu;
use wgpu::util::DeviceExt as _;
fn get_shader() -> wgpu::ShaderModuleDescriptor<'static> {
// based on rust-gpu/examples/runners/wgpu/src/lib.rs:maybe_watch
// Hack: spirv_builder builds into a custom directory if running under cargo, to not
// deadlock, and the default target directory if not. However, packages like `proc-macro2`
// have different configurations when being built here vs. when building
// rustc_codegen_spirv normally, so we *want* to build into a separate target directory, to
// not have to rebuild half the crate graph every time we run. So, pretend we're running
// under cargo by setting these environment variables.
// std::env::set_var("OUT_DIR", env!("OUT_DIR"));
// std::env::set_var("PROFILE", env!("PROFILE"));
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let crate_path = [manifest_dir, "src", "gpusim_impl"]
.iter()
.copied()
.collect::<PathBuf>();
let builder = SpirvBuilder::new(crate_path, "spirv-unknown-vulkan1.1")
.print_metadata(MetadataPrintout::None)
.capability(Capability::Int8);
let initial_result = builder.build().unwrap();
let module_path = initial_result.module.unwrap_single();
let data = std::fs::read(module_path).unwrap();
let spirv = match wgpu::util::make_spirv(&data) {
wgpu::ShaderSource::Wgsl(cow) => {
wgpu::ShaderSource::Wgsl(Cow::Owned(cow.into_owned()))
}
wgpu::ShaderSource::SpirV(cow) => {
wgpu::ShaderSource::SpirV(Cow::Owned(cow.into_owned()))
}
};
wgpu::ShaderModuleDescriptor {
label: None,
source: spirv,
flags: wgpu::ShaderFlags::default(),
}
}
async fn run_shader(shader_binary: wgpu::ShaderModuleDescriptor<'static>) -> (Vec<u32>, Duration) {
// based on rust-gpu/examples/runners/wgpu/src/compute.rs:start_internal
let instance = wgpu::Instance::new(wgpu::BackendBit::PRIMARY);
let adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::default(),
compatible_surface: None,
})
.await
.expect("Failed to find an appropriate adapter");
let (device, queue) = adapter
.request_device(
&wgpu::DeviceDescriptor {
label: None,
features: wgpu::Features::TIMESTAMP_QUERY,
limits: wgpu::Limits::default(),
},
None,
)
.await
.expect("Failed to create device");
drop(instance);
drop(adapter);
let timestamp_period = queue.get_timestamp_period();
// Load the shaders from disk
let module = device.create_shader_module(&shader_binary);
let top = 2u32.pow(20);
let src_range = 1..top;
let src = src_range
.clone()
// Not sure which endianness is correct to use here
.map(u32::to_ne_bytes)
.flat_map(core::array::IntoIter::new)
.collect::<Vec<_>>();
let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: None,
entries: &[
// XXX - some graphics cards do not support empty bind layout groups, so
// create a dummy entry.
wgpu::BindGroupLayoutEntry {
binding: 0,
count: None,
visibility: wgpu::ShaderStage::COMPUTE,
ty: wgpu::BindingType::Buffer {
has_dynamic_offset: false,
min_binding_size: Some(NonZeroU64::new(1).unwrap()),
ty: wgpu::BufferBindingType::Storage { read_only: false },
},
},
],
});
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: None,
bind_group_layouts: &[&bind_group_layout],
push_constant_ranges: &[],
});
let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: None,
layout: Some(&pipeline_layout),
module: &module,
entry_point: "main_cs",
});
let readback_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: src.len() as wgpu::BufferAddress,
// Can be read to the CPU, and can be copied from the shader's storage buffer
usage: wgpu::BufferUsage::MAP_READ | wgpu::BufferUsage::COPY_DST,
mapped_at_creation: false,
});
let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Collatz Conjecture Input"),
contents: &src,
usage: wgpu::BufferUsage::STORAGE
| wgpu::BufferUsage::COPY_DST
| wgpu::BufferUsage::COPY_SRC,
});
let timestamp_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Timestamps buffer"),
size: 16,
usage: wgpu::BufferUsage::MAP_READ | wgpu::BufferUsage::COPY_DST,
mapped_at_creation: true,
});
timestamp_buffer.unmap();
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: storage_buffer.as_entire_binding(),
}],
});
let queries = device.create_query_set(&wgpu::QuerySetDescriptor {
count: 2,
ty: wgpu::QueryType::Timestamp,
});
let mut encoder =
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
{
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
cpass.set_bind_group(0, &bind_group, &[]);
cpass.set_pipeline(&compute_pipeline);
cpass.write_timestamp(&queries, 0);
cpass.dispatch(src_range.len() as u32 / 64, 1, 1);
cpass.write_timestamp(&queries, 1);
}
encoder.copy_buffer_to_buffer(
&storage_buffer,
0,
&readback_buffer,
0,
src.len() as wgpu::BufferAddress,
);
encoder.resolve_query_set(&queries, 0..2, &timestamp_buffer, 0);
queue.submit(Some(encoder.finish()));
let buffer_slice = readback_buffer.slice(..);
let timestamp_slice = timestamp_buffer.slice(..);
let timestamp_future = timestamp_slice.map_async(wgpu::MapMode::Read);
let buffer_future = buffer_slice.map_async(wgpu::MapMode::Read);
device.poll(wgpu::Maintain::Wait);
let (buf_res, ts_res) = join(buffer_future, timestamp_future).await;
buf_res.unwrap();
ts_res.unwrap();
let data = buffer_slice.get_mapped_range();
let timing_data = timestamp_slice.get_mapped_range();
let result = data
.chunks_exact(4)
.map(|b| u32::from_ne_bytes(b.try_into().unwrap()))
.collect::<Vec<_>>();
let timings = timing_data
.chunks_exact(8)
.map(|b| u64::from_ne_bytes(b.try_into().unwrap()))
.collect::<Vec<_>>();
drop(data);
readback_buffer.unmap();
drop(timing_data);
timestamp_buffer.unmap();
let duration = Duration::from_nanos(
((timings[1] - timings[0]) as f64 * f64::from(timestamp_period)) as u64
);
(result, duration)
}
fn main() {
let (result, duration) = futures::executor::block_on(run_shader(get_shader()));
for out in result {
println!("{}", out);
}
println!("Took: {:?}", duration);
//assert!(false, "Took: {:?}", duration);
}
#[cfg(test)]
mod test {
#[test]
fn smoke() {
super::main();
}
}

View File

@@ -0,0 +1,10 @@
[package]
name = "gpusim_impl"
version = "0.1.0"
edition = "2018"
[dependencies]
spirv-std = { git = "https://github.com/EmbarkStudios/rust-gpu", features = ["glam"] }
[lib]
crate-type = ["dylib", "lib"]

View File

@@ -0,0 +1,56 @@
#![cfg_attr(
target_arch = "spirv",
feature(register_attr),
register_attr(spirv),
no_std
)]
// HACK(eddyb) can't easily see warnings otherwise from `spirv-builder` builds.
#![deny(warnings)]
extern crate spirv_std;
use glam::UVec3;
use spirv_std::glam;
#[cfg(not(target_arch = "spirv"))]
use spirv_std::macros::spirv;
// Adapted from the wgpu hello-compute example
pub fn collatz(mut n: u32) -> Option<u32> {
let mut i = 0;
if n == 0 {
return None;
}
while n != 1 {
n = if n % 2 == 0 {
n / 2
} else {
// Overflow? (i.e. 3*n + 1 > 0xffff_ffff)
if n >= 0x5555_5555 {
return None;
}
// TODO: Use this instead when/if checked add/mul can work: n.checked_mul(3)?.checked_add(1)?
3 * n + 1
};
i += 1;
}
Some(i)
}
// LocalSize/numthreads of (x = 64, y = 1, z = 1)
#[spirv(compute(threads(64)))]
pub fn main_cs(
#[spirv(global_invocation_id)] id: UVec3,
#[spirv(storage_buffer, descriptor_set = 0, binding = 0)] prime_indices: &mut [u32],
) {
let index = id.x as usize;
prime_indices[index] = unwrap_or_max(collatz(prime_indices[index]));
}
// Work around https://github.com/EmbarkStudios/rust-gpu/issues/677
fn unwrap_or_max(option: Option<u32>) -> u32 {
match option {
Some(inner) => inner,
None => u32::MAX,
}
}

View File

@@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}

View File

@@ -17,6 +17,8 @@ pub mod render;
pub mod sim;
pub mod stim;
mod gpusim;
pub use driver::*;
pub use mat::*;
pub use sim::*;