diff --git a/Cargo.toml b/Cargo.toml index eecf24c..87f632f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ enum_dispatch = "0.3" env_logger = "0.7" float_eq = "0.5" font8x8 = "0.2" +futures = "0.3" image = "0.23" imageproc = "0.21" indexmap = "1.6" @@ -42,6 +43,12 @@ y4m = "0.7" # erased-serde = "0.3" # serde_cbor = "0.11" +wgpu = "0.9" +spirv-builder = { git = "https://github.com/EmbarkStudios/rust-gpu", features=["use-compiled-tools"] } +spirv-std = { git = "https://github.com/EmbarkStudios/rust-gpu" } +spirv-std-macros = { git = "https://github.com/EmbarkStudios/rust-gpu" } + + [dev-dependencies] criterion = "0.3" diff --git a/src/gpusim.rs b/src/gpusim.rs new file mode 100644 index 0000000..9cce953 --- /dev/null +++ b/src/gpusim.rs @@ -0,0 +1,224 @@ +use futures::future::join; +use spirv_builder::{Capability, MetadataPrintout, SpirvBuilder}; +use std::borrow::Cow; +use std::num::NonZeroU64; +use std::path::PathBuf; +use std::time::Duration; +use wgpu; +use wgpu::util::DeviceExt as _; + +fn get_shader() -> wgpu::ShaderModuleDescriptor<'static> { + // based on rust-gpu/examples/runners/wgpu/src/lib.rs:maybe_watch + // Hack: spirv_builder builds into a custom directory if running under cargo, to not + // deadlock, and the default target directory if not. However, packages like `proc-macro2` + // have different configurations when being built here vs. when building + // rustc_codegen_spirv normally, so we *want* to build into a separate target directory, to + // not have to rebuild half the crate graph every time we run. So, pretend we're running + // under cargo by setting these environment variables. + // std::env::set_var("OUT_DIR", env!("OUT_DIR")); + // std::env::set_var("PROFILE", env!("PROFILE")); + let manifest_dir = env!("CARGO_MANIFEST_DIR"); + let crate_path = [manifest_dir, "src", "gpusim_impl"] + .iter() + .copied() + .collect::(); + let builder = SpirvBuilder::new(crate_path, "spirv-unknown-vulkan1.1") + .print_metadata(MetadataPrintout::None) + .capability(Capability::Int8); + let initial_result = builder.build().unwrap(); + + let module_path = initial_result.module.unwrap_single(); + let data = std::fs::read(module_path).unwrap(); + + let spirv = match wgpu::util::make_spirv(&data) { + wgpu::ShaderSource::Wgsl(cow) => { + wgpu::ShaderSource::Wgsl(Cow::Owned(cow.into_owned())) + } + wgpu::ShaderSource::SpirV(cow) => { + wgpu::ShaderSource::SpirV(Cow::Owned(cow.into_owned())) + } + }; + + wgpu::ShaderModuleDescriptor { + label: None, + source: spirv, + flags: wgpu::ShaderFlags::default(), + } +} + +async fn run_shader(shader_binary: wgpu::ShaderModuleDescriptor<'static>) -> (Vec, Duration) { + // based on rust-gpu/examples/runners/wgpu/src/compute.rs:start_internal + let instance = wgpu::Instance::new(wgpu::BackendBit::PRIMARY); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::default(), + compatible_surface: None, + }) + .await + .expect("Failed to find an appropriate adapter"); + + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features: wgpu::Features::TIMESTAMP_QUERY, + limits: wgpu::Limits::default(), + }, + None, + ) + .await + .expect("Failed to create device"); + drop(instance); + drop(adapter); + + let timestamp_period = queue.get_timestamp_period(); + + // Load the shaders from disk + let module = device.create_shader_module(&shader_binary); + + let top = 2u32.pow(20); + let src_range = 1..top; + + let src = src_range + .clone() + // Not sure which endianness is correct to use here + .map(u32::to_ne_bytes) + .flat_map(core::array::IntoIter::new) + .collect::>(); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[ + // XXX - some graphics cards do not support empty bind layout groups, so + // create a dummy entry. + wgpu::BindGroupLayoutEntry { + binding: 0, + count: None, + visibility: wgpu::ShaderStage::COMPUTE, + ty: wgpu::BindingType::Buffer { + has_dynamic_offset: false, + min_binding_size: Some(NonZeroU64::new(1).unwrap()), + ty: wgpu::BufferBindingType::Storage { read_only: false }, + }, + }, + ], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &module, + entry_point: "main_cs", + }); + + let readback_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: src.len() as wgpu::BufferAddress, + // Can be read to the CPU, and can be copied from the shader's storage buffer + usage: wgpu::BufferUsage::MAP_READ | wgpu::BufferUsage::COPY_DST, + mapped_at_creation: false, + }); + + let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("Collatz Conjecture Input"), + contents: &src, + usage: wgpu::BufferUsage::STORAGE + | wgpu::BufferUsage::COPY_DST + | wgpu::BufferUsage::COPY_SRC, + }); + + let timestamp_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("Timestamps buffer"), + size: 16, + usage: wgpu::BufferUsage::MAP_READ | wgpu::BufferUsage::COPY_DST, + mapped_at_creation: true, + }); + timestamp_buffer.unmap(); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: storage_buffer.as_entire_binding(), + }], + }); + + let queries = device.create_query_set(&wgpu::QuerySetDescriptor { + count: 2, + ty: wgpu::QueryType::Timestamp, + }); + + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None }); + cpass.set_bind_group(0, &bind_group, &[]); + cpass.set_pipeline(&compute_pipeline); + cpass.write_timestamp(&queries, 0); + cpass.dispatch(src_range.len() as u32 / 64, 1, 1); + cpass.write_timestamp(&queries, 1); + } + + encoder.copy_buffer_to_buffer( + &storage_buffer, + 0, + &readback_buffer, + 0, + src.len() as wgpu::BufferAddress, + ); + encoder.resolve_query_set(&queries, 0..2, ×tamp_buffer, 0); + + queue.submit(Some(encoder.finish())); + let buffer_slice = readback_buffer.slice(..); + let timestamp_slice = timestamp_buffer.slice(..); + let timestamp_future = timestamp_slice.map_async(wgpu::MapMode::Read); + let buffer_future = buffer_slice.map_async(wgpu::MapMode::Read); + device.poll(wgpu::Maintain::Wait); + + let (buf_res, ts_res) = join(buffer_future, timestamp_future).await; + buf_res.unwrap(); + ts_res.unwrap(); + let data = buffer_slice.get_mapped_range(); + let timing_data = timestamp_slice.get_mapped_range(); + let result = data + .chunks_exact(4) + .map(|b| u32::from_ne_bytes(b.try_into().unwrap())) + .collect::>(); + let timings = timing_data + .chunks_exact(8) + .map(|b| u64::from_ne_bytes(b.try_into().unwrap())) + .collect::>(); + drop(data); + readback_buffer.unmap(); + drop(timing_data); + timestamp_buffer.unmap(); + let duration = Duration::from_nanos( + ((timings[1] - timings[0]) as f64 * f64::from(timestamp_period)) as u64 + ); + (result, duration) +} + +fn main() { + let (result, duration) = futures::executor::block_on(run_shader(get_shader())); + for out in result { + println!("{}", out); + } + println!("Took: {:?}", duration); + //assert!(false, "Took: {:?}", duration); +} + +#[cfg(test)] +mod test { + #[test] + fn smoke() { + super::main(); + } +} diff --git a/src/gpusim_impl/Cargo.toml b/src/gpusim_impl/Cargo.toml new file mode 100644 index 0000000..d060ad9 --- /dev/null +++ b/src/gpusim_impl/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "gpusim_impl" +version = "0.1.0" +edition = "2018" + +[dependencies] +spirv-std = { git = "https://github.com/EmbarkStudios/rust-gpu", features = ["glam"] } + +[lib] +crate-type = ["dylib", "lib"] diff --git a/src/gpusim_impl/src/lib.rs b/src/gpusim_impl/src/lib.rs new file mode 100644 index 0000000..9953b30 --- /dev/null +++ b/src/gpusim_impl/src/lib.rs @@ -0,0 +1,56 @@ +#![cfg_attr( + target_arch = "spirv", + feature(register_attr), + register_attr(spirv), + no_std +)] +// HACK(eddyb) can't easily see warnings otherwise from `spirv-builder` builds. +#![deny(warnings)] + +extern crate spirv_std; + +use glam::UVec3; +use spirv_std::glam; +#[cfg(not(target_arch = "spirv"))] +use spirv_std::macros::spirv; + +// Adapted from the wgpu hello-compute example + +pub fn collatz(mut n: u32) -> Option { + let mut i = 0; + if n == 0 { + return None; + } + while n != 1 { + n = if n % 2 == 0 { + n / 2 + } else { + // Overflow? (i.e. 3*n + 1 > 0xffff_ffff) + if n >= 0x5555_5555 { + return None; + } + // TODO: Use this instead when/if checked add/mul can work: n.checked_mul(3)?.checked_add(1)? + 3 * n + 1 + }; + i += 1; + } + Some(i) +} + +// LocalSize/numthreads of (x = 64, y = 1, z = 1) +#[spirv(compute(threads(64)))] +pub fn main_cs( + #[spirv(global_invocation_id)] id: UVec3, + #[spirv(storage_buffer, descriptor_set = 0, binding = 0)] prime_indices: &mut [u32], +) { + let index = id.x as usize; + prime_indices[index] = unwrap_or_max(collatz(prime_indices[index])); +} + +// Work around https://github.com/EmbarkStudios/rust-gpu/issues/677 +fn unwrap_or_max(option: Option) -> u32 { + match option { + Some(inner) => inner, + None => u32::MAX, + } +} diff --git a/src/gpusim_impl/src/main.rs b/src/gpusim_impl/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/src/gpusim_impl/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/src/lib.rs b/src/lib.rs index e9b8d96..1197c17 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,8 @@ pub mod render; pub mod sim; pub mod stim; +mod gpusim; + pub use driver::*; pub use mat::*; pub use sim::*;