stim: strongly-type the return type of AbstractSim::at with a Fields struct

this will help me not mix up the E and H fields.
note a few suspect areas of code
2022-07-30 17:17:17 -07:00 · 2022-07-30 17:02:10 -07:00 · 2022-07-29 23:54:02 -07:00 · 2022-07-29 23:53:44 -07:00 · 2022-07-29 21:53:49 -07:00 · 2022-07-29 21:45:25 -07:00
63 changed files with 2684 additions and 3101 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 out/
 target/
+*.swp
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -160,6 +160,7 @@ dependencies = [
 name = "buffer_proto5"
 version = "0.1.0"
 dependencies = [
+ "bincode",
 "coremem",
 "log",
 "serde",
@@ -311,13 +312,12 @@ version = "0.1.0"
 dependencies = [
 "bincode",
 "common_macros",
- "coremem_types",
+ "coremem_cross",
 "criterion",
 "crossterm",
 "csv",
 "dashmap",
 "dyn-clone",
- "enum_dispatch",
 "env_logger",
 "float_eq",
 "font8x8",
@@ -344,6 +344,13 @@ dependencies = [
 "y4m",
 ]

+[[package]]
+name = "coremem_cross"
+version = "0.2.0"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "coremem_post"
 version = "0.1.0"
@@ -358,13 +365,6 @@ dependencies = [
 "structopt",
 ]

-[[package]]
-name = "coremem_types"
-version = "0.1.0"
-dependencies = [
- "serde",
-]
-
 [[package]]
 name = "crc32fast"
 version = "1.3.2"
@@ -577,18 +577,6 @@ version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"

-[[package]]
-name = "enum_dispatch"
-version = "0.3.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eb359f1476bf611266ac1f5355bc14aeca37b299d0ebccc038ee7058891c9cb"
-dependencies = [
- "once_cell",
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "env_logger"
 version = "0.9.0"
@@ -2095,7 +2083,7 @@ source = "git+https://github.com/EmbarkStudios/rust-gpu#0866cf591a7fdbbd15bdb346
 name = "spirv_backend"
 version = "0.1.0"
 dependencies = [
- "coremem_types",
+ "coremem_cross",
 "spirv-std",
 ]

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,7 @@ members = [
    "crates/spirv_backend",
    "crates/spirv_backend_builder",
    "crates/spirv_backend_runner",
-    "crates/types",
+    "crates/cross",
    "crates/post",

    "crates/applications/buffer_proto5",
--- a/crates/applications/buffer_proto5/Cargo.toml
+++ b/crates/applications/buffer_proto5/Cargo.toml
@@ -5,6 +5,7 @@ authors = ["Colin <colin@uninsane.org>"]
 edition = "2021"

 [dependencies]
+bincode = "1.3"  # MIT
 coremem = { path = "../../coremem" }
 log = "0.4"
 serde = "1.0"
--- a/crates/applications/buffer_proto5/src/cache.rs
+++ b/crates/applications/buffer_proto5/src/cache.rs
@@ -9,6 +9,7 @@ pub struct DiskCache<K, V, S=NoSupplier> {
 }

 impl<K: DeserializeOwned, V: DeserializeOwned> DiskCache<K, V, NoSupplier> {
+    #[allow(dead_code)]
    pub fn new(path: &str) -> Self {
        Self::new_with_supplier(path, NoSupplier)
    }
@@ -48,6 +49,7 @@ impl<K: Serialize, V: Serialize, S> DiskCache<K, V, S> {
 }

 impl<K: PartialEq + Serialize, V: Serialize + Clone, S> DiskCache<K, V, S> {
+    #[allow(dead_code)]
    pub fn get_or_insert_with<F: FnOnce() -> V>(&mut self, k: K, f: F) -> V {
        if let Some(v) = self.get(&k) {
            return v.clone();
--- a/crates/applications/buffer_proto5/src/main.rs
+++ b/crates/applications/buffer_proto5/src/main.rs
@@ -2,22 +2,27 @@
 //! to couple them. i parameterize the entire setup over a bunch of different factors in order to
 //! search for the conditions which maximize energy transfer from the one core to the other.

-use coremem::{Driver, mat, meas, SpirvDriver};
+use coremem::{Driver, mat, meas};
 use coremem::geom::{region, Cube, Dilate, Memoize, Meters, Spiral, SwapYZ, Torus, Translate, Wrap};
 use coremem::mat::{Ferroxcube3R1MH, IsoConductorOr};
 use coremem::real::{R32, Real as _};
 use coremem::render::CsvRenderer;
-use coremem::stim::{CurlStimulus, Exp1, Gated, Sinusoid1, TimeVarying as _};
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
 use coremem::sim::units::{Seconds, Time as _};
-use coremem::util::cache::DiskCache;
+use coremem::stim::{CurlStimulus, Exp1, Gated, Sinusoid1, TimeVarying as _};
 use log::{error, info, warn};
 use serde::{Deserialize, Serialize};

+mod cache;
+
+use cache::DiskCache;
+
 type Mat = IsoConductorOr<f32, Ferroxcube3R1MH>;

 #[allow(unused)]
 use coremem::geom::{Coord as _, Region as _};

+#[allow(unused)]
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum PulseType {
    Square,
@@ -26,6 +31,7 @@ enum PulseType {
    ExpDecay2x,
 }

+#[allow(unused)]
 /// Return just the extrema of some collection
 fn extrema(mut meas: Vec<f32>) -> Vec<f32> {
    let mut i = 0;
@@ -389,7 +395,9 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
        p.clock_type,
    );

-    let mut driver: SpirvDriver<Mat> = Driver::new_spirv(g.dim, p.geom.feat_size);
+    let mut driver = Driver::new(SpirvSim::<f32, Mat, WgpuBackend>::new(
+        g.dim.to_index(p.geom.feat_size), p.geom.feat_size
+    ));
    driver.set_steps_per_stim(1000);
    if !driver.add_state_file(&*format!("{}/state.bc", prefix), 16000) {
        // mu_r=881.33, starting at H=25 to H=75.
@@ -414,7 +422,7 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
        info!("loaded state file: skipping geometry calculations");
    }

-    let add_drive_sine_pulse = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_sine_pulse = |driver: &mut Driver<_>, region: &Torus, start: f32, duration: f32, amp: f32| {
        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
            .half_cycle()
            .shifted(start);
@@ -426,7 +434,7 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
        ));
    };

-    let add_drive_square_pulse = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_square_pulse = |driver: &mut Driver<_>, region: &Torus, start: f32, duration: f32, amp: f32| {
        let wave = Gated::new(amp, start, start+duration);
        driver.add_stimulus(CurlStimulus::new(
            region.clone(),
@@ -436,7 +444,7 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
        ));
    };

-    let add_drive_exp_pulse = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_exp_pulse = |driver: &mut Driver<_>, region: &Torus, start: f32, duration: f32, amp: f32| {
        let wave = Exp1::new_at(amp, start, 0.5*duration);
        driver.add_stimulus(CurlStimulus::new(
            region.clone(),
@@ -447,11 +455,11 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
    };

    // step function: "permanently" increase the current by `amp`.
-    let _add_drive_step = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, amp: f32| {
+    let _add_drive_step = |driver: &mut Driver<_>, region: &Torus, start: f32, amp: f32| {
        add_drive_square_pulse(driver, region, start, 1.0 /* effectively infinite duration */, amp);
    };

-    let add_drive_pulse = |ty: PulseType, driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_pulse = |ty: PulseType, driver: &mut Driver<_>, region: &Torus, start: f32, duration: f32, amp: f32| {
        match ty {
            PulseType::Square => add_drive_square_pulse(driver, region, start, duration, amp),
            PulseType::Sine => add_drive_sine_pulse(driver, region, start, duration, amp),
--- a/crates/applications/multi_core_inverter/src/main.rs
+++ b/crates/applications/multi_core_inverter/src/main.rs
@@ -36,20 +36,31 @@
 //! $ pushd crates/coremem; cargo run --release --bin viewer ../../out/applications/multi_core_inverter/0/ ; popd
 //! ```

-use coremem::geom::{Meters, Torus};
-use coremem::sim::units::Seconds;
+use coremem::geom::{Coord as _, Meters, Torus};
 use coremem::mat::{Ferroxcube3R1MH, IsoConductorOr, IsomorphicConductor};
-use coremem::{Driver, SpirvDriver};
+use coremem::meas;
+use coremem::real::{self, Real as _};
+use coremem::sim::spirv::{self, SpirvSim};
+use coremem::sim::units::Seconds;
+use coremem::stim::{CurlStimulus, Gated, Sinusoid1, TimeVarying as _};
+use coremem::Driver;

-type Mat = IsoConductorOr<f32, Ferroxcube3R1MH>;
+type R = real::R32;
+type Mat = IsoConductorOr<R, Ferroxcube3R1MH>;
+// type Backend = spirv::CpuBackend;
+type Backend = spirv::WgpuBackend;

 fn main() {
    coremem::init_logging();
    coremem::init_debug();
    let um = |n| n as f32 * 1e-6;
-    let ns = |n| n as f32 * 1e-9;
+    // let ns = |n| n as f32 * 1e-9;
+    let ps = |n| n as f32 * 1e-12;
    let feat_size = um(10);

+    let input_magnitude = 1.0e9;
+    let clock_phase_duration = ps(1000);
+
    let s_major = um(160);
    let s_minor = um(30);
    let io_major = um(80);
@@ -60,40 +71,106 @@ fn main() {
    let sx = |n| um((n+1) * 400);
    let sy = um(400);
    let sz = um(280);
-    let couplingx = |n| sx(n) - um(200);
+    let couplingx = |n| sx(n) + um(200);

    let sim_bounds = Meters::new(sx(4), sy * 2.0, sz * 2.0);
    let sim_padding = Meters::new(um(80), um(80), um(80));
-    let duration = Seconds(ns(1));

    let drive0 = Torus::new_xz(Meters::new(sx(0) - s_major, sy, sz), io_major, io_minor);
    let sense3 = Torus::new_xz(Meters::new(sx(3) + s_major, sy, sz), io_major, io_minor);
    let ctl = |n| Torus::new_yz(Meters::new(sx(n), sy + s_major, sz), io_major, io_minor);
    let s = |n| Torus::new_xy(Meters::new(sx(n), sy, sz), s_major, s_minor);
+    // coupling(n) is the wire which couples core n into core n+1
    let coupling = |n| Torus::new_xz(Meters::new(couplingx(n), sy, sz), coupling_major, coupling_minor);

-    let wire_mat = IsomorphicConductor::new(1e6);
-    let ferro_mat = wire_mat;
-    // let ferro_mat = Ferroxcube3R1MH::new(); // uncomment when ready to simulate for real
+    let input = |region: &Torus, cycle: u32, direction: i32| {
+        let area = region.cross_section();
+        let amp = direction as f32 * input_magnitude / area;
+        let start = clock_phase_duration * cycle as f32;
+        let wave = Gated::new(amp, start, start + clock_phase_duration);
+        // let wave = Sinusoid1::from_wavelength(direction as f32 * input_magnitude / area, clock_phase_duration * 2.0)
+        //     .half_cycle()
+        //     .shifted(clock_phase_duration * cycle as f32);
+        CurlStimulus::new(
+            region.clone(),
+            wave.clone(),
+            region.center(),
+            region.axis(),
+        )
+    };

-    let mut driver: SpirvDriver<Mat> = Driver::new_spirv(sim_bounds, feat_size);
-    driver.add_classical_boundary(sim_padding);
+    let wire_mat = IsomorphicConductor::new(1e6f32.cast::<R>());
+    // let ferro_mat = wire_mat;
+    let ferro_mat = Ferroxcube3R1MH::new();
+
+    let mut driver = Driver::new(SpirvSim::<R, Mat, Backend>::new(
+        sim_bounds.to_index(feat_size), feat_size,
+    ));
+    driver.add_classical_boundary_explicit::<R, _>(sim_padding);
+
+    //////// create the wires and toroids
    driver.fill_region(&drive0, wire_mat);
    driver.fill_region(&sense3, wire_mat);
    for core in 0..4 {
        driver.fill_region(&s(core), ferro_mat);
        driver.fill_region(&ctl(core), wire_mat);
-        if core != 0 {
+        if core != 3 {
            driver.fill_region(&coupling(core), wire_mat);
        }
    }

-    let prefix = "out/applications/multi_core_inverter/0/";
+    //////// monitor some measurements
+    // driver.add_measurement(meas::CurrentLoop::new("drv0", drive0.clone()));
+    for core in 0..4 {
+        driver.add_measurement(meas::CurrentLoop::new(
+            &format!("drive{}", core),
+            ctl(core),
+        ));
+    }
+    for core in 0..3 {
+        driver.add_measurement(meas::CurrentLoop::new(
+            &format!("sense{}", core),
+            coupling(core),
+        ));
+    }
+    for core in 0..4 {
+        driver.add_measurement(meas::MagneticLoop::new(
+            &format!("state{}", core),
+            s(core),
+        ));
+    }
+    driver.add_measurement(meas::CurrentLoop::new("sense3", sense3.clone()));
+
+    //////// create the stimuli
+    // CTL{n} effectively leads CTL{n-1}
+    // or: at time t, CTL{n} is at cycle[t+n]
+    // where cycle[t] is defined by CTL[0](t):
+    //   0, +Vdd, +Vdd, +Vdd
+    // TODO: this is wrong (as is the diagram in the blog)! CTL0, being an inverter,
+    //   needs -Vdd to recharge to +polarization
+    let cycles = 1;
+    let duration = Seconds(clock_phase_duration * (cycles + 2) as f32);
+    for cycle in 0..cycles {
+        for core in 0..1 { // TODO: core 0
+            let dir = 1;
+            //let dir = if (cycle+core) % 4 == 0 {
+            //    0
+            //} else {
+            //    1
+            //};
+            if dir != 0 {
+                // micro opt/safety: don't place zero-magnitude stimuli
+                driver.add_stimulus(input(&ctl(core), cycle, dir));
+            }
+        }
+    }
+
+    let prefix = "out/applications/multi_core_inverter/2/";
    let _ = std::fs::create_dir_all(&prefix);
    // driver.add_state_file(&*format!("{}state.bc", prefix), 9600);
-    driver.add_serializer_renderer(&*format!("{}frame-", prefix), 36000, None);
-    driver.add_csv_renderer(&*format!("{}meas.csv", prefix), 400, None);
-    driver.set_steps_per_stim(200);
+    driver.add_serializer_renderer(&*format!("{}frame-", prefix), 400, None);
+    driver.add_csv_renderer(&*format!("{}meas.csv", prefix), 100, None);
+    driver.set_steps_per_stim(100);

    driver.step_until(duration);
 }
--- a/crates/applications/sr_latch/src/main.rs
+++ b/crates/applications/sr_latch/src/main.rs
@@ -3,8 +3,9 @@
 /// the SR latch in this example is wired to a downstream latch, mostly to show that it's
 /// possible to transfer the state (with some limitation) from one latch to another.

-use coremem::{Driver, mat, meas, SpirvDriver};
-use coremem::geom::{Meters, Torus};
+use coremem::{Driver, mat, meas};
+use coremem::geom::{Coord as _, Meters, Torus};
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
 use coremem::sim::units::Seconds;
 use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};

@@ -58,7 +59,9 @@ fn main() {
    let coupling_region = Torus::new_xz(Meters::new(0.5*(ferro1_center + ferro2_center), ferro_center_y, half_depth), wire_coupling_major, wire_minor);
    let sense_region = Torus::new_xz(Meters::new(ferro2_center + ferro_major, ferro_center_y, half_depth), wire_major, wire_minor);

-    let mut driver: SpirvDriver<mat::FullyGenericMaterial<f32>> = Driver::new_spirv(Meters::new(width, height, depth), feat_size);
+    let mut driver = Driver::new(SpirvSim::<f32, mat::FullyGenericMaterial<f32>, WgpuBackend>::new(
+        Meters::new(width, height, depth).to_index(feat_size), feat_size
+    ));

    // mu_r=881.33, starting at H=25 to H=75.
    driver.fill_region(&ferro1_region, mat::MHPgram::new(25.0, 881.33, 44000.0));
--- a/crates/applications/wavefront/src/main.rs
+++ b/crates/applications/wavefront/src/main.rs
@@ -7,11 +7,14 @@
 //! with something that absorbs energy. since this example doesn't, it lets you see what
 //! happens when you just use the default boundary conditions.

-use coremem::{mat, driver};
+use coremem::{mat, Driver};
 use coremem::geom::{Coord as _, Cube, Index};
 use coremem::units::Seconds;
+use coremem::sim::spirv::{self, SpirvSim};
 use coremem::stim::{Stimulus, TimeVarying as _, UniformStimulus};
-use coremem::types::vec::Vec3;
+use coremem::cross::vec::Vec3;
+
+type Mat = mat::FullyGenericMaterial<f32>;

 fn main() {
    coremem::init_logging();
@@ -23,13 +26,12 @@ fn main() {
    let feature_size = 1e-6;

    // create the simulation "driver".
-    // by default all the computations are done with R32: a f32 which panics on NaN/Inf.
-    // you can parameterize it to use R64, or unchecked f32 -- see src/driver.rs for definition.
-    let mut driver: driver::SpirvDriver = driver::Driver::new_spirv(size, feature_size);
-
-    // uncomment to use the Spirv/GPU driver. this one is restricted to unchecked f32.
-    // note: this won't have better perf unless you reduce the y4m/term renderer framerate below.
-    // let mut driver: driver::SpirvDriver = driver::Driver::new_spirv(size, feature_size);
+    // the first parameter is the float type to use: f32 for unchecked math, coremem::real::R32
+    // to guard against NaN/Inf (useful for debugging).
+    // to run this on the gpu instead of the gpu, replace `CpuBackend` with `WgpuBackend`.
+    let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::CpuBackend>::new(
+        size, feature_size
+    ));

    // create a conductor on the left side.
    let conductor = Cube::new(
--- a/crates/coremem/Cargo.toml
+++ b/crates/coremem/Cargo.toml
@@ -16,7 +16,6 @@ crossterm = "0.24"  # MIT
 csv = "1.1"  # MIT or Unlicense
 dashmap = "5.3"  # MIT
 dyn-clone = "1.0"  # MIT or Apache 2.0
-enum_dispatch = "0.3"  # MIT or Apache 2.0
 env_logger = "0.9"  # MIT or Apache 2.0
 float_eq = "1.0"  # MIT or Apache 2.0
 font8x8 = "0.3"  # MIT
@@ -47,7 +46,7 @@ spirv-std = { git = "https://github.com/EmbarkStudios/rust-gpu" }
 spirv-std-macros = { git = "https://github.com/EmbarkStudios/rust-gpu" }
 spirv_backend = { path = "../spirv_backend" }
 spirv_backend_runner = { path = "../spirv_backend_runner" }
-coremem_types = { path = "../types", features = ["fmt", "serde"] }
+coremem_cross = { path = "../cross", features = ["fmt", "serde"] }


 [dev-dependencies]
--- a/crates/coremem/benches/driver.rs
+++ b/crates/coremem/benches/driver.rs
@@ -1,27 +1,16 @@
-use coremem::{Driver, SimState, SpirvDriver};
+use coremem::Driver;
 use coremem::geom::Index;
-use coremem::mat::{Ferroxcube3R1MH, IsoConductorOr, GenericMaterial, GenericMaterialNoPml, GenericMaterialOneField};
-use coremem::real::R32;
-use coremem::sim::spirv::{self, SpirvSim};
+use coremem::mat::{Ferroxcube3R1MH, IsoConductorOr, FullyGenericMaterial};
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
 use criterion::{BenchmarkId, criterion_group, criterion_main, Criterion};

-type DefaultDriver = Driver::<SimState<R32, GenericMaterial<R32>>>;
-
-pub fn bench_step(c: &mut Criterion) {
-    for size in &[10, 20, 40, 80, 160] {
-        let sim = SimState::<R32, GenericMaterial<R32>>::new(Index::new(*size, *size, *size), 1e-5);
-        c.bench_with_input(BenchmarkId::new("Driver::step", size), &sim, |b, sim| {
-            let mut driver = Driver::new_with_state(sim.clone());
-            b.iter(|| driver.step())
-        });
-    }
-}

 pub fn bench_step_spirv(c: &mut Criterion) {
+    type Mat = FullyGenericMaterial<f32>;
    for size in &[10, 20, 40, 80, 160] {
-        let sim: SpirvSim = SpirvSim::new(Index::new(*size, *size, *size), 1e-5);
+        let sim = SpirvSim::<f32, Mat, WgpuBackend>::new(Index::new(*size, *size, *size), 1e-5);
        c.bench_with_input(BenchmarkId::new("Driver::step_spirv", size), &sim, |b, sim| {
-            let mut driver = Driver::new_with_state(sim.clone());
+            let mut driver = Driver::new(sim.clone());
            b.iter(|| driver.step())
        });
    }
@@ -30,42 +19,13 @@ pub fn bench_step_spirv(c: &mut Criterion) {
 pub fn bench_step_spirv_iso_3r1(c: &mut Criterion) {
    type Mat = IsoConductorOr<f32, Ferroxcube3R1MH>;
    for size in &[10, 20, 40, 80, 160] {
-        let sim: SpirvSim<Mat> = SpirvSim::new(Index::new(*size, *size, *size), 1e-5);
+        let sim = SpirvSim::<f32, Mat, WgpuBackend>::new(Index::new(*size, *size, *size), 1e-5);
        c.bench_with_input(BenchmarkId::new("Driver::spirv_ISO3R1", size), &sim, |b, sim| {
-            let mut driver: SpirvDriver<Mat> = Driver::new_with_state(sim.clone());
+            let mut driver = Driver::new(sim.clone());
            b.iter(|| driver.step())
        });
    }
 }

-// pub fn bench_step_no_pml(c: &mut Criterion) {
-//     for size in &[10, 20, 40, 80, 160] {
-//         c.bench_with_input(BenchmarkId::new("Driver::step_no_pml", size), size, |b, &size| {
-//             let mut driver = DefaultDriver::new(Index::new(size, size, size), 1e-5);
-//             b.iter(|| driver.step())
-//         });
-//     }
-// }
-// 
-// pub fn bench_step_one_vec(c: &mut Criterion) {
-//     for size in &[10, 20, 40, 80, 160] {
-//         c.bench_with_input(BenchmarkId::new("Driver::step_one_vec", size), size, |b, &size| {
-//             let mut driver = DefaultDriver::new(Index::new(size, size, size), 1e-5);
-//             b.iter(|| driver.step())
-//         });
-//     }
-// }
-// 
-// pub fn bench_step_with_pml(c: &mut Criterion) {
-//     let size = 40;
-//     for thickness in &[0, 1, 2, 4, 8, 16] {
-//         c.bench_with_input(BenchmarkId::new("Driver::step_with_pml", thickness), thickness, |b, &thickness| {
-//             let mut driver = DefaultDriver::new(Index::new(size, size, size), 1e-5);
-//             driver.add_pml_boundary(Index::new(thickness, thickness, thickness));
-//             b.iter(|| driver.step())
-//         });
-//     }
-// }
-
-criterion_group!(benches, /*bench_step,*/ bench_step_spirv, bench_step_spirv_iso_3r1);
+criterion_group!(benches, bench_step_spirv, bench_step_spirv_iso_3r1);
 criterion_main!(benches);
--- a/crates/coremem/benches/history.txt
+++ b/crates/coremem/benches/history.txt
@@ -103,3 +103,15 @@ Driver::spirv_ISO3R1/20 time:   [548.70 us 555.85 us 563.28 us]
 Driver::spirv_ISO3R1/40 time:   [1.5333 ms 1.5405 ms 1.5489 ms]
 Driver::spirv_ISO3R1/80 time:   [13.299 ms 13.335 ms 13.376 ms]
 Driver::spirv_ISO3R1/160time:   [164.57 ms 164.74 ms 164.93 ms]
+
+5a0766451d96835061a674ab94f00341adb2b187:
+Driver::step_spirv/10   time:   [590.26 us 600.42 us 613.28 us]
+Driver::step_spirv/20   time:   [870.49 us 884.81 us 902.21 us]
+Driver::step_spirv/40   time:   [3.4094 ms 3.4285 ms 3.4498 ms]
+Driver::step_spirv/80   time:   [35.488 ms 35.673 ms 35.922 ms]
+Driver::step_spirv/160  time:   [270.98 ms 271.19 ms 271.43 ms]
+Driver::spirv_ISO3R1/10 time:   [585.57 us 596.11 us 608.79 us]
+Driver::spirv_ISO3R1/20 time:   [826.63 us 841.79 us 860.86 us]
+Driver::spirv_ISO3R1/40 time:   [2.8808 ms 2.9004 ms 2.9237 ms]
+Driver::spirv_ISO3R1/80 time:   [28.955 ms 29.027 ms 29.115 ms]
+Driver::spirv_ISO3R1/160time:   [216.03 ms 216.22 ms 216.45 ms]
--- a/crates/coremem/src/bin/bench.rs
+++ b/crates/coremem/src/bin/bench.rs
@@ -1,6 +1,7 @@
-use coremem::{self, Driver, GenericSim};
+use coremem::{self, Driver, AbstractSim};
 use coremem::sim::legacy::SimState;
-use coremem::sim::spirv::SpirvSim;
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
+use coremem::cross::mat::FullyGenericMaterial;
 use coremem::geom::Index;
 use std::time::{Instant, Duration};

@@ -17,19 +18,35 @@ fn measure<F: FnMut()>(name: &str, n_times: u32, mut f: F) -> f32 {
    avg
 }

-fn measure_steps<S: GenericSim + Clone + Default + Send + Sync + 'static>(name: &str, steps_per_call: u32, mut d: Driver<S>) {
+fn measure_steps<S: AbstractSim + Clone + Default + Send + 'static>(name: &str, steps_per_call: u32, mut d: Driver<S>) {
    measure(name, 100/steps_per_call, || d.step_multiple(steps_per_call));
 }

 fn main() {
    coremem::init_logging();
-    measure_steps("spirv/80", 1, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
-    measure_steps("sim/80", 1, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
-    measure_steps("spirv/80 step(2)", 2, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
-    measure_steps("sim/80 step(2)", 2, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
-    measure_steps("spirv/80 step(10)", 10, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
-    measure_steps("sim/80 step(10)", 10, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
-    measure_steps("spirv/80 step(100)", 100, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
-    measure_steps("sim/80 step(100)", 100, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
+    measure_steps("spirv/80", 1, Driver::new(
+        SpirvSim::<f32, FullyGenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
+    ));
+    measure_steps("sim/80", 1, Driver::<SimState>::new(
+        SimState::new(Index::new(80, 80, 80), 1e-3)
+    ));
+    measure_steps("spirv/80 step(2)", 2, Driver::new(
+        SpirvSim::<f32, FullyGenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
+    ));
+    measure_steps("sim/80 step(2)", 2, Driver::<SimState>::new(
+        SimState::new(Index::new(80, 80, 80), 1e-3)
+    ));
+    measure_steps("spirv/80 step(10)", 10, Driver::new(
+        SpirvSim::<f32, FullyGenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
+    ));
+    measure_steps("sim/80 step(10)", 10, Driver::<SimState>::new(
+        SimState::new(Index::new(80, 80, 80), 1e-3)
+    ));
+    measure_steps("spirv/80 step(100)", 100, Driver::new(
+        SpirvSim::<f32, FullyGenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
+    ));
+    measure_steps("sim/80 step(100)", 100, Driver::<SimState>::new(
+        SimState::new(Index::new(80, 80, 80), 1e-3)
+    ));
 }

--- a/crates/coremem/src/bin/pml_tuning.rs
+++ b/crates/coremem/src/bin/pml_tuning.rs
@@ -1,498 +0,0 @@
-use coremem::*;
-use coremem::geom::*;
-use coremem::real::{Real as _, ToFloat as _};
-use coremem::sim::legacy::mat::Static;
-use coremem::stim::AbstractStimulus;
-use coremem::types::vec::Vec3;
-use rand::rngs::StdRng;
-use rand::{Rng as _, SeedableRng as _};
-
-fn energy<R: Region>(s: &dyn SampleableSim, reg: &R) -> f32 {
-    let e = f64::half() * s.map_sum_over_enumerated(reg, |_pos: Index, cell| {
-        cell.e().mag_sq().to_f64()
-    });
-    e.cast()
-}
-
-fn energy_now_and_then<R: Region>(state: &mut StaticSim, reg: &R, frames: u32) -> (f32, f32) {
-    let energy_0 = energy(state, reg);
-    for _ in 0..frames {
-        state.step();
-    }
-    let energy_1 = energy(state, reg);
-    (energy_0, energy_1)
-}
-
-struct PmlStim<F> {
-    /// Maps index -> (stim vector, stim frequency)
-    f: F,
-    t_end: f32,
-    feat_size: f32,
-}
-impl<F: Fn(Index) -> (Vec3<f32>, f32) + Sync> AbstractStimulus for PmlStim<F> {
-    fn at(&self, t_sec: f32, pos: Meters) -> (Vec3<f32>, Vec3<f32>) {
-        let angle = t_sec/self.t_end*f32::two_pi();
-        let gate = 0.5*(1.0 - angle.cos());
-        let (e, hz) = (self.f)(pos.to_index(self.feat_size));
-        let sig_angle = angle*hz;
-        let sig = sig_angle.sin();
-        (e*gate*sig, Vec3::zero())
-    }
-}
-
-/// Apply some stimulus, and then let it decay and measure the ratio of energy left in the system
-fn apply_stim_full_interior<F>(state: &mut StaticSim, frames: u32, f: F)
-where F: Fn(Index) -> (Vec3<f32>, f32) + Sync // returns (E vector, omega)
-{
-    let stim = PmlStim {
-        f,
-        t_end: (frames as f32) * state.timestep(),
-        feat_size: state.feature_size(),
-    };
-    for _t in 0..frames {
-        state.apply_stimulus(&stim);
-        state.step();
-    }
-}
-
-fn apply_stim_over_region<R, F>(state: &mut StaticSim, frames: u32, reg: R, f: F)
-where
-    R: Region,
-    F: Fn(Index) -> (Vec3<f32>, f32) + Sync,
-{
-    let feat = state.feature_size();
-    apply_stim_full_interior(state, frames, |idx| {
-        if reg.contains(idx.to_meters(feat)) {
-            f(idx)
-        } else {
-            (Vec3::zero(), 0.0)
-        }
-    });
-}
-
-/// Stimulate each point in the region with a pseudorandom (but predictable) e wave
-fn apply_chaotic_stim_over_region<R: Region>(state: &mut StaticSim, frames: u32, interior: R) {
-    apply_stim_over_region(state, frames, interior, |idx| {
-        let seed = (idx.x() as u64) ^ ((idx.y() as u64) << 16) ^ ((idx.z() as u64) << 32);
-        let mut rng = StdRng::seed_from_u64(seed);
-        let dir = Vec3::new(
-            rng.gen_range(-1.0..1.0),
-            rng.gen_range(-1.0..1.0),
-            rng.gen_range(-1.0..1.0),
-        );
-        // XXX only works if it's a whole number. I suppose this makes sense though, as
-        // other numbers would create higher harmonics when gated.
-        let hz = rng.gen_range(0..=2);
-        (dir, hz as _)
-    })
-}
-
-fn chaotic_pml_test(state: &mut StaticSim, boundary: u32, padding: u32, frames: u32) -> f32 {
-    let feat = state.feature_size();
-    {
-        let upper_left_idx = Index::unit()*padding;
-        let lower_right_idx = state.size() - Index::unit() - upper_left_idx;
-        let interior = Cube::new(upper_left_idx.to_meters(feat), lower_right_idx.to_meters(feat));
-        apply_chaotic_stim_over_region(state, frames, interior);
-    }
-
-    let upper_left_idx = Index::unit()*boundary;
-    let lower_right_idx = state.size() - Index::unit() - upper_left_idx;
-    let sim_region = Cube::new(upper_left_idx.to_meters(feat), lower_right_idx.to_meters(feat));
-
-    let (energy_0, energy_1) = energy_now_and_then(state, &sim_region, frames);
-    // println!("Energy: {}/{}", energy_1, energy_0);
-    energy_1/energy_0
-}
-
-#[allow(unused)]
-fn state_for_pml(size: Index, boundary: Index, feat_size: f32, sc_coeff: f32, cond_coeff: f32, pow: f32) -> StaticSim {
-    let mut state = StaticSim::new(size, feat_size);
-    let timestep = state.timestep();
-    state.fill_boundary_using(boundary, |boundary_ness| {
-        let b = boundary_ness.elem_pow(pow);
-        let coord_stretch = b * sc_coeff / timestep;
-        let conductivity = Vec3::unit() * (b.mag() * cond_coeff / timestep);
-        unimplemented!();
-        Static::default()
-        // Static {
-        //     // TODO PML coord_stretch,
-        //     conductivity,
-        //     pml: Some((PmlState::new(), PmlParameters::new(coord_stretch))),
-        //     ..Default::default()
-        // }
-    });
-    state
-}
-
-fn main() {
-    // Explanation here (slide 63): https://empossible.net/wp-content/uploads/2020/01/Lecture-The-Perfectly-Matched-Layer.pdf
-    // Claims that eps0/delta_t * n^3 is a good way to activate the PML layer
-
-    for pow in vec![3.0] {
-    for sc_coeff in vec![
-        // 0.1*consts::EPS0,
-        // 0.5*consts::EPS0,
-        // 1e0*consts::EPS0,
-        // 1e1*consts::EPS0,
-        // 1e2*consts::EPS0,
-        // 1e3*consts::EPS0,
-        // 1e6*consts::EPS0,
-        // 1e9*consts::EPS0,
-        // 1e12*consts::EPS0,
-        // 1e15*consts::EPS0,
-        // 1e18*consts::EPS0,
-        // 1e21*consts::EPS0,
-        // 0.01,
-        // 0.03,
-        // 0.05,
-        // 0.07,
-        // 0.08,
-        // 0.09,
-        // 0.1,
-        // 0.15,
-        // 0.2,
-        // 0.25,
-        // 0.3,
-        // 0.4,
-        // 0.5,
-        // 0.6,
-        // 0.7,
-        // 0.8,
-        // 0.9,
-        //0.95,
-        // 1.0,
-        // 1.5,
-        // 2.0,
-        // 3.0,
-        // 5.0,
-        // 10.0,
-        // 100.0,
-        // 1000.0,
-        //0.0,
-        0.5,
-    ] {
-    //for cond_coeff in vec![0.0, 1.0, 1e3, 1e6, 1e9] {
-    for cond_coeff in vec![0.0, 0.5*f32::eps0()] {
-        for frames in vec![400, 1200] {
-            for pml_width in vec![1, 2, 4, 8, 16] {
-                for feat_size in vec![1e-6] {
-                    let size = Index::unit()*121;
-                    let sim_inset = 40;
-                    let boundary = Index::unit()*pml_width;
-                    let mut state = state_for_pml(size, boundary, feat_size, sc_coeff, cond_coeff, pow);
-                    let ratio = chaotic_pml_test(&mut state, pml_width, sim_inset, frames);
-                    println!("{},{} (pow={}, f={}, width={}, frames={}): {}", sc_coeff, cond_coeff, pow, feat_size, pml_width, frames, ratio);
-                }
-            }
-        }
-    }
-    }
-    }
-    // Conclusions:
-    // * if coordinate stretching is divided by time_step, then the absorption is independent of
-    //   feature size.
-    // For 240 frames, PML width of 20, size=121, sim_inset = 40, cubic onset:
-    // * coefficients between [0.4, 1.0] are all within 30% of each other
-    // * coefficients between [0.5, 1.0] are all within 20% of each other
-    // * coefficients > 1 show instability (i.e. instable if coord_stretch > 1 / timestep);
-    // * absorption generally increases as the coefficient approaches 1.0 from the left.
-    //   This begins to reverse at least by 0.98
-    // For 2400 frames, instability starts somewhere between 0.7 and 0.8
-    //   0.5 (f=0.000001, width=20, frames=2400): 0.0006807010986857421
-    //   0.6 (f=0.000001, width=20, frames=2400): 0.0005800974138738364
-    //   0.7 (f=0.000001, width=20, frames=2400): 0.0005092274390086559
-    //   0.8 (f=0.000001, width=20, frames=2400): 7326609898580109000000
-    //   These numbers use hz = rng.gen_range(0..=5), with 20 frames of excitation and 20 steps
-    //     between source and boundary
-    //
-    // Reducing hz to 0..=2 achieves much better perf (pow=3.0):
-    //   0.5 (f=0.000001, width=20, frames=2400): 0.0000007135657380376141
-    //   0.6 (f=0.000001, width=20, frames=2400): 0.0000006561934691721446
-    //   0.7 (f=0.000001, width=20, frames=2400): 0.0000006099334150762627
-    //   0.8 (f=0.000001, width=20, frames=2400): 2180868728529433400000
-    // Quadratic (pow=2.0):
-    //   0.4 (pow=2, f=0.000001, width=20, frames=2400): 0.0000006839243836328471
-    //   0.5 (pow=2, f=0.000001, width=20, frames=2400): 0.0000006189051555210391
-    //   0.6 (pow=2, f=0.000001, width=20, frames=2400): 0.9331062414894028
-    //   0.7 (pow=2, f=0.000001, width=20, frames=2400): 222818414359827930000000000000000000000000000000000000000000000000000000000000000000000
-    // Possibly better efficacy, but also less stable
-    // Linear (pow=1.0) is 100% unstable for at least coeff >= 0.5
-    // Subcubic (pow=2.5):
-    //   0.4 (pow=2.5, f=0.000001, width=20, frames=2400): 0.0000007333781495449756
-    //   0.5 (pow=2.5, f=0.000001, width=20, frames=2400): 0.0000006632381361190291
-    //   0.6 (pow=2.5, f=0.000001, width=20, frames=2400): 0.0000006089982278171636
-    //   0.7 (pow=2.5, f=0.000001, width=20, frames=2400): 7853952559.189004
-    // Superlinear (pow=1.5):
-    //   0.4 (pow=1.5, f=0.000001, width=20, frames=2400): 0.0000006502259131444893
-    //   0.5 (pow=1.5, f=0.000001, width=20, frames=2400): 0.0000005956833662247809
-    //   0.6 (pow=1.5, f=0.000001, width=20, frames=2400): 175615904583581400000000000000000000000000000000000000000000000000000000000000000000
-    // pow=4.0:
-    //   0.4 (pow=4, f=0.000001, width=20, frames=2400): 0.0000009105207758718423
-    //   0.5 (pow=4, f=0.000001, width=20, frames=2400): 0.0000008169331995553951
-    //   0.6 (pow=4, f=0.000001, width=20, frames=2400): 0.0000007525813705681166
-    //   0.7 (pow=4, f=0.000001, width=20, frames=2400): 0.0000007019565296008103
-    //   0.8 (pow=4, f=0.000001, width=20, frames=2400): 0.0000006600546205898854
-    //   0.9 (pow=4, f=0.000001, width=20, frames=2400): 0.0000006246047577819345
-    //   1 (pow=4, f=0.000001, width=20, frames=2400): 5437370254206590000000000000000000000000000000000
-    // pow=3.5:
-    //   0.4 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000008485707012112478
-    //   0.5 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000007653501316913873
-    //   0.6 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000007047188963430957
-    //   0.7 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000006561795324449849
-    //   0.8 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000006159621438553542
-    //   0.9 (pow=3.5, f=0.000001, width=20, frames=2400): 18913062838424785000000000000000000
-    // So generally lower powers = more absorbtion (makes sense: higher average coordinate
-    // stretching), but is more sensitive to error. All powers > 1.5 are stable at coeff=0.5.
-    // We don't know that much about reflection from just this data.
-    // Running over a smaller timeframe should give some suggestion about reflection
-    //   0.4 (pow=2, f=0.000001, width=20, frames=120): 0.2870021971493659
-    //   0.5 (pow=2, f=0.000001, width=20, frames=120): 0.2647114916517679 **
-    //   0.6 (pow=2, f=0.000001, width=20, frames=120): 0.24807341265362437
-    //   0.7 (pow=2, f=0.000001, width=20, frames=120): 0.2350081813369852
-    //   0.8 (pow=2, f=0.000001, width=20, frames=120): 0.22438565227661014
-    //   0.9 (pow=2, f=0.000001, width=20, frames=120): 0.21552516261496907
-    //   1.0 (pow=2, f=0.000001, width=20, frames=120): 0.20798702383197107
-    //   1.5 (pow=2, f=0.000001, width=20, frames=120): 0.39444643124947426
-    //   0.4 (pow=3, f=0.000001, width=20, frames=120): 0.3596522778473902
-    //   0.5 (pow=3, f=0.000001, width=20, frames=120): 0.33718409856439474
-    //   0.6 (pow=3, f=0.000001, width=20, frames=120): 0.32025414395335816
-    //   0.7 (pow=3, f=0.000001, width=20, frames=120): 0.3067653188201421 **
-    //   0.8 (pow=3, f=0.000001, width=20, frames=120): 0.2956250635204507
-    //   0.9 (pow=3, f=0.000001, width=20, frames=120): 0.2861895432992242
-    //   1.0 (pow=3, f=0.000001, width=20, frames=120): 0.27804573152917056
-    //   1.5 (pow=3, f=0.000001, width=20, frames=120): 0.24950413937515897
-    //   0.4 (pow=4, f=0.000001, width=20, frames=120): 0.41043160705678194
-    //   0.5 (pow=4, f=0.000001, width=20, frames=120): 0.38844802581052806
-    //   0.6 (pow=4, f=0.000001, width=20, frames=120): 0.3721037027266112
-    //   0.7 (pow=4, f=0.000001, width=20, frames=120): 0.35912262947037576
-    //   0.8 (pow=4, f=0.000001, width=20, frames=120): 0.34837556188585944
-    //   0.9 (pow=4, f=0.000001, width=20, frames=120): 0.33922665697709947 **
-    //   1 (pow=4, f=0.000001, width=20, frames=120): 0.33128127577349487
-    //   1.5 (pow=4, f=0.000001, width=20, frames=120): 0.30260541581006584
-    // Even smaller timeframe:
-    //   0.5 (pow=2, f=0.000001, width=20, frames=5): 1.0109467232603757
-    //   0.5 (pow=2, f=0.000001, width=20, frames=10): 1.0117049984675424
-    //   0.5 (pow=2, f=0.000001, width=20, frames=20): 1.015054295037722
-    //   0.5 (pow=2, f=0.000001, width=20, frames=30): 1.016127315217072
-    //   0.5 (pow=2, f=0.000001, width=20, frames=40): 1.0125450428623481
-    //   0.5 (pow=2, f=0.000001, width=20, frames=60): 0.9520114341681021
-    //   0.5 (pow=2, f=0.000001, width=20, frames=80): 0.7617289087518861
-    //   0.5 (pow=2, f=0.000001, width=20, frames=120): 0.2647114916517679
-    //   0.5 (pow=2, f=0.000001, width=20, frames=160): 0.018698432021826004
-    //   0.5 (pow=2, f=0.000001, width=20, frames=200): 0.001053437888276037
-    //   0.5 (pow=3, f=0.000001, width=20, frames=5): 1.0109467232603757
-    //   0.5 (pow=3, f=0.000001, width=20, frames=10): 1.0117049984674478
-    //   0.5 (pow=3, f=0.000001, width=20, frames=20): 1.0150542142468075
-    //   0.5 (pow=3, f=0.000001, width=20, frames=30): 1.0161755433699602
-    //   0.5 (pow=3, f=0.000001, width=20, frames=40): 1.0140118995944478
-    //   0.5 (pow=3, f=0.000001, width=20, frames=60): 0.9828724159361404
-    //   0.5 (pow=3, f=0.000001, width=20, frames=80): 0.8304056107875255
-    //   0.5 (pow=3, f=0.000001, width=20, frames=120): 0.33718409856439474
-    //   0.5 (pow=3, f=0.000001, width=20, frames=160): 0.033906877503334515
-    //   0.5 (pow=3, f=0.000001, width=20, frames=200): 0.0016075121270576818
-    //   0.5 (pow=4, f=0.000001, width=20, frames=5): 1.0109467232603757
-    //   0.5 (pow=4, f=0.000001, width=20, frames=10): 1.0117049984674424
-    //   0.5 (pow=4, f=0.000001, width=20, frames=20): 1.015054203569572
-    //   0.5 (pow=4, f=0.000001, width=20, frames=30): 1.0161827617273314
-    //   0.5 (pow=4, f=0.000001, width=20, frames=40): 1.014355067000562
-    //   0.5 (pow=4, f=0.000001, width=20, frames=60): 0.9971421502814346
-    //   0.5 (pow=4, f=0.000001, width=20, frames=80): 0.8718457443603458
-    //   0.5 (pow=4, f=0.000001, width=20, frames=120): 0.38844802581052806
-    //   0.5 (pow=4, f=0.000001, width=20, frames=160): 0.04879003869310861
-    //   0.5 (pow=4, f=0.000001, width=20, frames=200): 0.0025039595751290534
-    //   That the numbers are all the same for t < 20 is not encouraging.
-    //     Could be because of the courant number 0.577? No energy has reached the border yet?
-    // Why don't we query just the energy in the sim region -- not the boundary?
-    // After filtering to measure energy only in the sim region (note that some energy outside the
-    // sim region COULD be reflected back into the sim region later):
-    //   0.5 (pow=2, f=0.000001, width=20, frames=5): 1.0109467232603757
-    //   0.5 (pow=2, f=0.000001, width=20, frames=10): 1.0117049984565956
-    //   0.5 (pow=2, f=0.000001, width=20, frames=20): 1.0150051760172862
-    //   0.5 (pow=2, f=0.000001, width=20, frames=30): 1.0103422463150569
-    //   0.5 (pow=2, f=0.000001, width=20, frames=40): 0.9534926741875023
-    //   0.5 (pow=2, f=0.000001, width=20, frames=60): 0.7052236436305864
-    //   0.5 (pow=2, f=0.000001, width=20, frames=80): 0.42366103692252166
-    //   0.5 (pow=2, f=0.000001, width=20, frames=120): 0.047117482623349645
-    //   0.5 (pow=2, f=0.000001, width=20, frames=160): 0.0012788501490854916
-    //   0.5 (pow=2, f=0.000001, width=20, frames=200): 0.00025052555946608536
-    //   0.5 (pow=2, f=0.000001, width=20, frames=300): 0.000026163786700826282
-    //   0.5 (pow=2, f=0.000001, width=20, frames=400): 0.000009482696335385068
-    //   0.5 (pow=2, f=0.000001, width=20, frames=600): 0.0000034339203626681873
-    //   0.5 (pow=2, f=0.000001, width=20, frames=800): 0.0000016501593549063537
-    //   0.5 (pow=2, f=0.000001, width=20, frames=1200): 0.0000007097133927819365
-    //   0.5 (pow=2, f=0.000001, width=20, frames=1600): 0.0000004990355277480898
-    //   0.5 (pow=2, f=0.000001, width=20, frames=2400): 0.00000026541631012862064
-    //   0.5 (pow=3, f=0.000001, width=20, frames=5): 1.0109467232603757
-    //   0.5 (pow=3, f=0.000001, width=20, frames=10): 1.0117049984564943
-    //   0.5 (pow=3, f=0.000001, width=20, frames=20): 1.015005103609574
-    //   0.5 (pow=3, f=0.000001, width=20, frames=30): 1.010347173332254
-    //   0.5 (pow=3, f=0.000001, width=20, frames=40): 0.9534945122673749
-    //   0.5 (pow=3, f=0.000001, width=20, frames=60): 0.7052084303225945
-    //   0.5 (pow=3, f=0.000001, width=20, frames=80): 0.42365590263359737
-    //   0.5 (pow=3, f=0.000001, width=20, frames=120): 0.04711930602404218
-    //   0.5 (pow=3, f=0.000001, width=20, frames=160): 0.0013058690825149086
-    //   0.5 (pow=3, f=0.000001, width=20, frames=200): 0.00027124952640186995
-    //   0.5 (pow=3, f=0.000001, width=20, frames=300): 0.00003363606115185493
-    //   0.5 (pow=3, f=0.000001, width=20, frames=400): 0.000012979216745112146
-    //   0.5 (pow=3, f=0.000001, width=20, frames=600): 0.000004790890253991059
-    //   0.5 (pow=3, f=0.000001, width=20, frames=800): 0.0000018967555372300478
-    //   0.5 (pow=3, f=0.000001, width=20, frames=1200): 0.000000785458387440694
-    //   0.5 (pow=3, f=0.000001, width=20, frames=1600): 0.0000005657588895614973
-    //   0.5 (pow=3, f=0.000001, width=20, frames=2400): 0.0000002931443936827707
-    //   0.5 (pow=4, f=0.000001, width=20, frames=5): 1.0109467232603757
-    //   0.5 (pow=4, f=0.000001, width=20, frames=10): 1.011704998456489
-    //   0.5 (pow=4, f=0.000001, width=20, frames=20): 1.0150050988039427
-    //   0.5 (pow=4, f=0.000001, width=20, frames=30): 1.0103477261980267
-    //   0.5 (pow=4, f=0.000001, width=20, frames=40): 0.9534933477754826
-    //   0.5 (pow=4, f=0.000001, width=20, frames=60): 0.7052093991486525
-    //   0.5 (pow=4, f=0.000001, width=20, frames=80): 0.4236543609610083
-    //   0.5 (pow=4, f=0.000001, width=20, frames=120): 0.047143352151935734
-    //   0.5 (pow=4, f=0.000001, width=20, frames=160): 0.0014253338264054373
-    //   0.5 (pow=4, f=0.000001, width=20, frames=200): 0.0003719236701949888
-    //   0.5 (pow=4, f=0.000001, width=20, frames=300): 0.0000655709065994179
-    //   0.5 (pow=4, f=0.000001, width=20, frames=400): 0.000019428436023699366
-    //   0.5 (pow=4, f=0.000001, width=20, frames=600): 0.000006787101433521501
-    //   0.5 (pow=4, f=0.000001, width=20, frames=800): 0.000002352253311002263
-    //   0.5 (pow=4, f=0.000001, width=20, frames=1200): 0.0000008945009475225972
-    //   0.5 (pow=4, f=0.000001, width=20, frames=1600): 0.0000006298015155592009
-    //   0.5 (pow=4, f=0.000001, width=20, frames=2400): 0.0000003110877898243663
-    //   pow=2 and pow=3 look nearly interchangeable in perf, with pow=4 slightly worse.
-    //   Given higher stability for pow=3, it pushes me in that direction
-    // What about ordinary conductivity?
-    //   Uniaxial conductors do nothing
-    // non-axial conductors (measured over full volume -- not just sim volume)
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=20): 1.0150542017357447
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=40): 1.0144968836020802
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=80): 1.001249693707569
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.9703627391410313
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=240): 0.9839347439326915
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.9710971333858839
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=20): 1.015054152058065
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=40): 1.0139965206685178
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=80): 0.8611234324272511
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.38724876605299674
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=240): 0.010452374686360136
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.0006325928742141608
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=20): 1.0150333781336882
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=40): 0.9834360538048873
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=80): 0.5072413415421974
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=120): 0.10345998021237998
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=240): 0.007519161562708659
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=400): 0.0006070229440826888
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=20): 1.0150056763216164
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=40): 1.0123907067781637
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=80): 1.0095377475794036
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=120): 1.0027226600640793
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=240): 1.002880617201309
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=400): 0.9817776859704778
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=20): 1.0150056260987463
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=40): 1.0127202910529511
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=80): 1.0141889645709163
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=120): 1.012712629869813
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=240): 1.0225762750160987
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=400): 1.0158169138074233
-    //   0,1 (pow=2, f=0.000001, width=20, frames=20): 1.0150056260488272
-    //   0,1 (pow=2, f=0.000001, width=20, frames=40): 1.0127206223077903
-    //   0,1 (pow=2, f=0.000001, width=20, frames=80): 1.01419363999915
-    //   0,1 (pow=2, f=0.000001, width=20, frames=120): 1.0127226953239867
-    //   0,1 (pow=2, f=0.000001, width=20, frames=240): 1.0225963209694455
-    //   0,1 (pow=2, f=0.000001, width=20, frames=400): 1.0158520285873147
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=20): 1.0150542017810857
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=40): 1.014497284230248
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=80): 1.0013349322815948
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.9707653838346726
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=240): 0.98542720202683
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.9736668760490382
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=20): 1.0150541972999592
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=40): 1.0143834319294165
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=80): 0.9087702844329181
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.4641021861485614
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=240): 0.025542565246607578
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.0024678463432975324
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=20): 1.0150507306076173
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=40): 0.9973368290451762
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=80): 0.5359561255382069
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=120): 0.10107229161449789
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=240): 0.0018726822600725027
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=400): 0.0001395247365920271
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=20): 1.015006719517991
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=40): 1.0067208901320701
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=80): 0.9295573543881088
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=120): 0.8372562924822498
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=240): 0.7109091146305759
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=400): 0.5405933200528408
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=20): 1.015005627048278
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=40): 1.0127139915480314
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=80): 1.0141000517606538
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=120): 1.0125212238134818
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=240): 1.0221951620616994
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=400): 1.0151495300965783
-    //   0,1 (pow=3, f=0.000001, width=20, frames=20): 1.0150056260497768
-    //   0,1 (pow=3, f=0.000001, width=20, frames=40): 1.012720616007617
-    //   0,1 (pow=3, f=0.000001, width=20, frames=80): 1.0141935510766382
-    //   0,1 (pow=3, f=0.000001, width=20, frames=120): 1.0127225038875007
-    //   0,1 (pow=3, f=0.000001, width=20, frames=240): 1.0225959397081388
-    //   0,1 (pow=3, f=0.000001, width=20, frames=400): 1.0158513607157937
-    //   It appears here that the cubic roll-off is best, and very low conductivities are required.
-    //
-    // Isotropic conductor, energy measured only within the sim area:
-    //   The optimized case (0.5*EPS0/timestep * x^3) is almost IDENTICAL to the
-    //   optimized stretched-coordinate version.
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.07433102240945513
-    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.20431918328907037
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.04851013262210713
-    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.00018366232272528987
-    //   0,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=120): 0.047101055930619994
-    //   0,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=400): 0.000007399479477711689
-    //   0,0.000000000008854187812813 (pow=2, f=0.000001, width=20, frames=120): 0.047132676278627536
-    //   0,0.000000000008854187812813 (pow=2, f=0.000001, width=20, frames=400): 0.000008105989741898135
-    //   0,0.00000000001 (pow=2, f=0.000001, width=20, frames=120): 0.047145523858393434
-    //   0,0.00000000001 (pow=2, f=0.000001, width=20, frames=400): 0.000008327266269591764
-    //   0,0.0000000001 (pow=2, f=0.000001, width=20, frames=120): 0.05005311352870215
-    //   0,0.0000000001 (pow=2, f=0.000001, width=20, frames=400): 0.00003665711687010239
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=120): 0.0808038989604745
-    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=400): 0.0005636469832522345
-    //   0,0.00000001 (pow=2, f=0.000001, width=20, frames=120): 0.42090062694288544
-    //   0,0.00000001 (pow=2, f=0.000001, width=20, frames=400): 0.07558396270665715
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=120): 0.9456301791158588
-    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=400): 0.9456217562930543
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=120): 0.9549600788853474
-    //   0,0.001 (pow=2, f=0.000001, width=20, frames=400): 0.9782784298162882
-    //   0,1 (pow=2, f=0.000001, width=20, frames=120): 0.9549694776242036
-    //   0,1 (pow=2, f=0.000001, width=20, frames=400): 0.9783121245194134
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.07435108171392021
-    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.20479144240774633
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.050005777221700895
-    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.0006548443359188922
-    //   0,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=120): 0.0471097820852083
-    //   0,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=400): 0.00000726683693079002
-    //   0,0.000000000008854187812813 (pow=3, f=0.000001, width=20, frames=120): 0.04711795151894901
-    //   0,0.000000000008854187812813 (pow=3, f=0.000001, width=20, frames=400): 0.000007527865420649209
-    //   0,0.00000000001 (pow=3, f=0.000001, width=20, frames=120): 0.0471214615521427
-    //   0,0.00000000001 (pow=3, f=0.000001, width=20, frames=400): 0.000007561968840324101
-    //   0,0.0000000001 (pow=3, f=0.000001, width=20, frames=120): 0.04775494018209187
-    //   0,0.0000000001 (pow=3, f=0.000001, width=20, frames=400): 0.00001655968038405813
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=120): 0.05548225210925576
-    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=400): 0.00011973516700797696
-    //   0,0.00000001 (pow=3, f=0.000001, width=20, frames=120): 0.09859866928724702
-    //   0,0.00000001 (pow=3, f=0.000001, width=20, frames=400): 0.0010788775694009376
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=120): 0.7905518369602526
-    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=400): 0.5217962235754597
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=120): 0.9547813505519078
-    //   0,0.001 (pow=3, f=0.000001, width=20, frames=400): 0.9776380394998598
-    //   0,1 (pow=3, f=0.000001, width=20, frames=120): 0.9549692988681137
-    //   0,1 (pow=3, f=0.000001, width=20, frames=400): 0.978311483657099
-    // With both PML and conductor boundary:
-    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=120): 0.04709454626708049
-    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=400): 0.000007626399231684735
-    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=1200): 0.0000008116632120088743
-    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=2400): 0.00000032497678753300923
-    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=120): 0.04709779630205149
-    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=400): 0.000007132448872463483
-    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=1200): 0.000000710451109532798
-    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=2400): 0.0000002515070714149805
-    //   This is basically no change from JUST PML or JUST conductors
-    //   Maybe I should be trying to vary the width: maybe PML works more effectively for narrower
-    //   boundaries than do conductors?
-}
--- a/crates/coremem/src/driver.rs
+++ b/crates/coremem/src/driver.rs
@@ -1,15 +1,11 @@
 use crate::geom::{Coord, Index, Meters, Region};
 use crate::mat;
 use crate::meas::{self, AbstractMeasurement};
-use crate::real::{self, Real};
+use crate::real::Real;
 use crate::render::{self, MultiRenderer, Renderer};
-use crate::sim::{GenericSim, MaterialSim, SampleableSim};
-use crate::sim::legacy::{self, SimState};
-use crate::sim::legacy::mat::Pml;
+use crate::sim::AbstractSim;
 use crate::sim::units::{Frame, Time};
-use crate::sim::spirv::SpirvSim;
-use crate::stim::AbstractStimulus;
-use crate::types::vec::Vec3;
+use crate::stim::{self, AbstractStimulus};

 use log::{info, trace};
 use serde::{Deserialize, Serialize};
@@ -19,18 +15,19 @@ use std::sync::mpsc::{sync_channel, SyncSender, Receiver};
 use std::time::{Duration, Instant};
 use threadpool::ThreadPool;

-pub struct Driver<S=SimState> {
-    pub state: S,
+pub struct Driver<S> {
+    state: S,
    renderer: Arc<MultiRenderer<S>>,
    // TODO: use Rayon's thread pool?
    render_pool: ThreadPool,
    render_channel: (SyncSender<()>, Receiver<()>),
+    // TODO: split out into a Diagnostics struct
    time_spent_stepping: Duration,
    time_spent_on_stimuli: Duration,
    time_spent_prepping_render: Duration,
    time_spent_blocked_on_render: Duration,
    time_spent_rendering: Arc<Mutex<Duration>>,
-    measurements: Vec<Box<dyn AbstractMeasurement>>,
+    measurements: Vec<Arc<dyn AbstractMeasurement<S>>>,
    stimuli: StimuliAdapter,
    start_time: Instant,
    last_diag_time: Instant,
@@ -38,24 +35,8 @@ pub struct Driver<S=SimState> {
    sim_end_time: Option<Frame>,
 }

-pub type CpuDriver<R=real::R32, M=legacy::mat::GenericMaterial<R>> = Driver<SimState<R, M>>;
-pub type SpirvDriver<M=mat::FullyGenericMaterial<f32>> = Driver<SpirvSim<M>>;
-
-impl<R: Real, M: Default> Driver<SimState<R, M>> {
-    pub fn new<C: Coord>(size: C, feature_size: f32) -> Self {
-        Self::new_with_state(SimState::new(size.to_index(feature_size), feature_size))
-    }
-}
-
-impl<M: Default + 'static> SpirvDriver<M>
-{
-    pub fn new_spirv<C: Coord>(size: C, feature_size: f32) -> Self {
-        Self::new_with_state(SpirvSim::new(size.to_index(feature_size), feature_size))
-    }
-}
-
-impl<S> Driver<S> {
-    pub fn new_with_state(state: S) -> Self {
+impl<S: AbstractSim> Driver<S> {
+    pub fn new(state: S) -> Self {
        Self {
            state,
            renderer: Arc::new(MultiRenderer::new()),
@@ -67,10 +48,10 @@ impl<S> Driver<S> {
            time_spent_blocked_on_render: Default::default(),
            time_spent_rendering: Default::default(),
            measurements: vec![
-                Box::new(meas::Time),
-                Box::new(meas::Meta),
-                Box::new(meas::Energy::world()),
-                Box::new(meas::Power::world()),
+                Arc::new(meas::Time),
+                Arc::new(meas::Meta),
+                Arc::new(meas::Energy::world()),
+                Arc::new(meas::Power::world()),
            ],
            stimuli: StimuliAdapter::new(),
            start_time: Instant::now(),
@@ -78,15 +59,13 @@ impl<S> Driver<S> {
            sim_end_time: None,
        }
    }
-}

-impl<S> Driver<S> {
    pub fn add_stimulus<Stim: AbstractStimulus + 'static>(&mut self, s: Stim) {
        self.stimuli.push(Box::new(s))
    }

-    pub fn add_measurement<Meas: AbstractMeasurement + 'static>(&mut self, m: Meas) {
-        self.measurements.push(Box::new(m));
+    pub fn add_measurement<Meas: AbstractMeasurement<S> + 'static>(&mut self, m: Meas) {
+        self.measurements.push(Arc::new(m));
    }

    pub fn set_steps_per_stim(&mut self, steps_per_stim: u64) {
@@ -94,16 +73,20 @@ impl<S> Driver<S> {
    }
 }

-impl<S: MaterialSim> Driver<S> {
+impl<S: AbstractSim> Driver<S> {
    pub fn fill_region<Reg: Region, M: Into<S::Material> + Clone>(&mut self, region: &Reg, mat: M) {
        self.state.fill_region(region, mat);
    }
-    pub fn test_region_filled<Reg: Region, M: Into<S::Material> + Clone>(&mut self, region: &Reg, mat: M) -> bool {
+    pub fn test_region_filled<Reg: Region, M>(&mut self, region: &Reg, mat: M) -> bool
+    where
+        M: Into<S::Material> + Clone,
+        S::Material: PartialEq
+    {
        self.state.test_region_filled(region, mat)
    }
 }

-impl<S: SampleableSim> Driver<S> {
+impl<S: AbstractSim> Driver<S> {
    pub fn size(&self) -> Index {
        self.state.size()
    }
@@ -115,11 +98,7 @@ impl<S: SampleableSim> Driver<S> {
    }
 }

-impl<S: SampleableSim + Send + Sync + 'static> Driver<S> {
-    pub fn dyn_state(&mut self) -> &mut dyn SampleableSim {
-        &mut self.state
-    }
-
+impl<S: AbstractSim + 'static> Driver<S> {
    fn add_renderer<Rend: Renderer<S> + 'static>(
        &mut self, renderer: Rend, name: &str, step_frequency: u64, frame_limit: Option<u64>
    ) {
@@ -142,31 +121,27 @@ impl<S: SampleableSim + Send + Sync + 'static> Driver<S> {
    }
 }

-impl<S: SampleableSim + Send + Sync + Serialize + 'static> Driver<S> {
+impl<S: AbstractSim + Serialize + 'static> Driver<S> {
    pub fn add_serializer_renderer(&mut self, out_base: &str, step_frequency: u64, frame_limit: Option<u64>) {
        let fmt_str = format!("{out_base}{{step_no}}.bc", out_base=out_base);
-        self.add_renderer(render::SerializerRenderer::new_static(&*fmt_str), &*fmt_str, step_frequency, frame_limit);
+        self.add_renderer(render::SerializerRenderer::new_generic(&*fmt_str), &*fmt_str, step_frequency, frame_limit);
    }
 }

-impl<S: SampleableSim + Send + Sync + Serialize + for<'a> Deserialize<'a> + 'static> Driver<S> {
+impl<S: AbstractSim + Send + Sync + Serialize + for<'a> Deserialize<'a> + 'static> Driver<S> {
    /// instruct the driver to periodically save the simulation state to the provided path.
    /// also attempts to load an existing state file, returning `true` on success.
    pub fn add_state_file(&mut self, state_file: &str, snapshot_frequency: u64) -> bool {
        let ser = render::SerializerRenderer::new(state_file);
-        let loaded = match ser.try_load() {
-            Some(state) => {
-                self.state = state.state;
-                true
-            },
-            None => false,
-        };
+        let loaded = ser.try_load().map(|s| {
+            self.state = s.state;
+        }).is_some();
        self.add_renderer(ser, state_file, snapshot_frequency, None);
        loaded
    }
 }

-impl<S: GenericSim + Clone + Default + Send + Sync + 'static> Driver<S> {
+impl<S: AbstractSim + Clone + Default + Send + 'static> Driver<S> {
    fn render(&mut self) {
        let prep_start = Instant::now();
        let their_state = self.state.clone();
@@ -179,7 +154,8 @@ impl<S: GenericSim + Clone + Default + Send + Sync + 'static> Driver<S> {
            sender.send(()).unwrap();
            trace!("render begin");
            let start_time = Instant::now();
-            renderer.render(&their_state, &*their_measurements, Default::default());
+            let meas: Vec<&dyn AbstractMeasurement<S>> = their_measurements.iter().map(|m| &**m).collect();
+            renderer.render(&their_state, &*meas, Default::default());
            *time_spent_rendering.lock().unwrap() += start_time.elapsed();
            trace!("render end");
        });
@@ -263,7 +239,7 @@ impl<S: GenericSim + Clone + Default + Send + Sync + 'static> Driver<S> {
        let sim_end_time = sim_end_time.to_frame(self.state.timestep());
        self.sim_end_time = Some(sim_end_time);
        let mut stepped = false;
-        while self.dyn_state().step_no() < *sim_end_time {
+        while self.state.step_no() < *sim_end_time {
            self.step_multiple(100);
            stepped = true;
        }
@@ -276,18 +252,7 @@ impl<S: GenericSim + Clone + Default + Send + Sync + 'static> Driver<S> {
    }
 }

-impl<S: MaterialSim> Driver<S> {
-    pub fn add_pml_boundary<C: Coord, R: Real>(&mut self, thickness: C)
-        where S::Material: From<Pml<R>>
-    {
-        let timestep = self.state.timestep();
-        self.state.fill_boundary_using(thickness, |boundary_ness| {
-            let b = boundary_ness.elem_pow(3.0);
-            let conductivity = b * (0.5 / timestep);
-            Pml::new(conductivity)
-        });
-    }
-
+impl<S: AbstractSim> Driver<S> {
    pub fn add_classical_boundary<C: Coord>(&mut self, thickness: C)
        where S::Material: From<mat::IsomorphicConductor<f32>>
    {
@@ -321,7 +286,7 @@ struct StimuliAdapter {
 }

 impl AbstractStimulus for StimuliAdapter {
-    fn at(&self, t_sec: f32, pos: Meters) -> (Vec3<f32>, Vec3<f32>) {
+    fn at(&self, t_sec: f32, pos: Meters) -> stim::Fields {
        self.stim.at(t_sec, pos)
        // TODO: remove this stuff  (here only for testing)
        /*
--- a/crates/coremem/src/geom/line.rs
+++ b/crates/coremem/src/geom/line.rs
@@ -1,5 +1,5 @@
-use coremem_types::real::Real;
-use coremem_types::vec::Vec2;
+use coremem_cross::real::Real;
+use coremem_cross::vec::Vec2;

 use std::ops::Add;

--- a/crates/coremem/src/geom/polygon.rs
+++ b/crates/coremem/src/geom/polygon.rs
@@ -1,6 +1,6 @@
 use crate::geom::Line2d;
-use coremem_types::real::Real;
-use coremem_types::vec::Vec2;
+use coremem_cross::real::Real;
+use coremem_cross::vec::Vec2;

 #[derive(Clone, Debug, PartialEq)]
 pub struct Polygon2d<R> {
--- a/crates/coremem/src/geom/region/primitives.rs
+++ b/crates/coremem/src/geom/region/primitives.rs
@@ -1,6 +1,6 @@
 use crate::geom::Meters;
-use coremem_types::real::Real as _;
-use coremem_types::vec::{Vec2, Vec3};
+use coremem_cross::real::Real as _;
+use coremem_cross::vec::{Vec2, Vec3};

 use serde::{Serialize, Deserialize};
 use std::fmt::{self, Display};
@@ -87,7 +87,7 @@ impl Region for Torus {
        // 2. Find the point `q` on the circle which is nearest to `p`.
        // 3. Consider the distance from `p` to `q`.
        let rel_p = *p - *self.center;
-        let p_on_plane = rel_p - self.normal.with_mag(self.normal.dot(rel_p));
+        let p_on_plane = rel_p - self.normal.with_mag(self.normal.dot(rel_p)).unwrap();
        let q = if p_on_plane == Vec3::zero() {
            // avoid division by zero.
            // The point is precisely on the axis of the torus.
@@ -95,9 +95,9 @@ impl Region for Torus {
            // and they all give the same answer.
            // Such a point is given by rotating the normal axis by 90 degrees in ANY DIRECTION
            let off_axis = self.normal.arbitrary_orthogonal_vector();
-            off_axis.with_mag(self.major_rad)
+            off_axis.with_mag(self.major_rad).unwrap()
        } else {
-            p_on_plane.with_mag(self.major_rad)
+            p_on_plane.with_mag(self.major_rad).unwrap()
        };
        let distance_to_circle_sq = rel_p.distance_sq(q);
        distance_to_circle_sq < self.minor_rad * self.minor_rad
--- a/crates/coremem/src/geom/units.rs
+++ b/crates/coremem/src/geom/units.rs
@@ -1,5 +1,5 @@
-use coremem_types::real::ToFloat;
-use coremem_types::vec::{Vec3, Vec3u};
+use coremem_cross::real::ToFloat;
+use coremem_cross::vec::{Vec3, Vec3u};
 use serde::{Serialize, Deserialize};
 use std::fmt::{self, Display};
 use std::cmp::Ordering;
--- a/crates/coremem/src/lib.rs
+++ b/crates/coremem/src/lib.rs
@@ -13,13 +13,12 @@ pub mod meas;
 pub mod render;
 pub mod sim;
 pub mod stim;
-pub mod util;

 pub use driver::*;
 pub use sim::*;
-pub use coremem_types as types;
-pub use coremem_types::real;
-pub use coremem_types::mat;
+pub use coremem_cross as cross;
+pub use coremem_cross::real;
+pub use coremem_cross::mat;

 // Some things to keep in mind:
 // B = mu_r*H + M
--- a/crates/coremem/src/meas.rs
+++ b/crates/coremem/src/meas.rs
@@ -1,21 +1,22 @@
 use crate::geom::{Meters, Region, Torus, WorldRegion};
 use crate::real::{Real as _, ToFloat as _};
-use crate::types::vec::Vec3;
-use crate::sim::SampleableSim;
-use dyn_clone::{self, DynClone};
+use crate::cross::vec::Vec3;
+use crate::sim::AbstractSim;
 use indexmap::IndexMap;
 use serde::{Serialize, Deserialize};

-// TODO: remove this Clone and Send requirement? Have Measurements be shared by-reference across
-// threads? i.e. Sync, and no Clone
-#[typetag::serde(tag = "type")]
-pub trait AbstractMeasurement: Send + Sync + DynClone {
-    fn eval(&self, state: &dyn SampleableSim) -> String;
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String>;
+pub trait AbstractMeasurement<S>: Send + Sync {
+    fn eval(&self, state: &S) -> String;
+    fn key_value(&self, state: &S) -> IndexMap<String, String>;
 }
-dyn_clone::clone_trait_object!(AbstractMeasurement);

-pub fn eval_multiple_kv(state: &dyn SampleableSim, meas: &[Box<dyn AbstractMeasurement>]) -> IndexMap<String, String> {
+pub fn as_dyn_measurements<S, M: AbstractMeasurement<S>>(meas: &[M]) -> Vec<&dyn AbstractMeasurement<S>> {
+    meas.into_iter().map(|m| m as &dyn AbstractMeasurement<S>).collect()
+}
+
+
+/// create one IndexMap out of several measurements
+pub fn eval_multiple_kv<S>(state: &S, meas: &[&dyn AbstractMeasurement<S>]) -> IndexMap<String, String> {
    let mut r = IndexMap::new();
    for m in meas {
        let other = m.key_value(state);
@@ -24,15 +25,93 @@ pub fn eval_multiple_kv(state: &dyn SampleableSim, meas: &[Box<dyn AbstractMeasu
    r
 }

+pub fn eval_to_vec<S>(state: &S, meas: &[&dyn AbstractMeasurement<S>]) -> Vec<Evaluated> {
+    eval_multiple_kv(state, meas).into_iter().map(|(k, v)| {
+        Evaluated::new(k, v)
+    }).collect()
+}
+
+enum SiScale {
+    Pico,
+    Nano,
+    Micro,
+    Milli,
+    Unit,
+    Kilo,
+    Mega,
+    Giga,
+    Terra,
+}
+
+impl SiScale {
+    fn for_value(v: f32) -> Self {
+        use SiScale::*;
+        match v {
+            v if v < 1e-12 => Unit,
+            v if v < 1e-9  => Pico,
+            v if v < 1e-6  => Nano,
+            v if v < 1e-3  => Micro,
+            v if v < 1e0   => Milli,
+            v if v < 1e3   => Unit,
+            v if v < 1e6   => Kilo,
+            v if v < 1e9   => Mega,
+            v if v < 1e12  => Giga,
+            v if v < 1e15  => Terra,
+            _ => Unit
+        }
+    }
+
+    /// return the numerical scale of this prefix.
+    /// e.g. `scale(&Pico) -> 1e-12
+    fn scale(&self) -> f32 {
+        use SiScale::*;
+        match *self {
+            Pico =>  1e-12,
+            Nano =>  1e-9,
+            Micro => 1e-6,
+            Milli => 1e-3,
+            Unit =>  1.0,
+            Kilo =>  1e3,
+            Mega =>  1e6,
+            Giga =>  1e9,
+            Terra => 1e12,
+        }
+    }
+
+    /// return the short string for this scale.
+    /// e.g. `shortcode(Pico) -> "p"`
+    fn shortcode(&self) -> &'static str {
+        use SiScale::*;
+        match *self {
+            Pico =>  "p",
+            Nano =>  "n",
+            Micro => "u",
+            Milli => "m",
+            Unit =>  "",
+            Kilo =>  "k",
+            Mega =>  "M",
+            Giga =>  "G",
+            Terra => "T",
+        }
+    }
+
+    /// format `v`, with the provided unit.
+    /// e.g. `format_short(1234, "A") -> "1.23 kA"
+    fn format_short(v: f32, unit: &str) -> String {
+        let si = SiScale::for_value(v);
+        let scaled = si.scale() * v;
+        format!("{:.2} {}{}", scaled, si.shortcode(), unit)
+    }
+}
+
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Time;

-#[typetag::serde]
-impl AbstractMeasurement for Time {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
-        format!("{:.3e}s (step {})", state.time(), state.step_no())
+impl<S: AbstractSim> AbstractMeasurement<S> for Time {
+    fn eval(&self, state: &S) -> String {
+        format!("{} (step {})", SiScale::format_short(state.time(), "s"), state.step_no())
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        [
            ("step".to_string(), state.step_no().to_string()),
            ("time".to_string(), state.time().to_string()),
@@ -43,12 +122,11 @@ impl AbstractMeasurement for Time {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Meta;

-#[typetag::serde]
-impl AbstractMeasurement for Meta {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for Meta {
+    fn eval(&self, state: &S) -> String {
        format!("{}x{}x{} feat: {:.1e}m", state.width(), state.height(), state.depth(), state.feature_size())
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        [
            ("width".to_string(), state.width().to_string()),
            ("height".to_string(), state.height().to_string()),
@@ -58,23 +136,31 @@ impl AbstractMeasurement for Meta {
    }
 }

+/// some measurement which has already been evaluated.
+/// this is used particularly if we need to monomorphize a measurement (e.g. for serialization)
+/// and know it won't be applied to a new/different state.
 #[derive(Clone, Serialize, Deserialize)]
-pub struct Label(pub String);
+pub struct Evaluated(String, String);

-impl Label {
-    pub fn new<S: Into<String>>(s: S) -> Self {
-        Self(s.into())
+impl Evaluated {
+    pub fn new<S1: Into<String>, S2: Into<String>>(key: S1, value: S2) -> Self {
+        Self(key.into(), value.into())
+    }
+    pub fn key(&self) -> &str {
+        &*self.0
+    }
+    pub fn value(&self) -> &str {
+        &*self.1
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for Label {
-    fn eval(&self, _state: &dyn SampleableSim) -> String {
-        self.0.clone()
+impl<S> AbstractMeasurement<S> for Evaluated {
+    fn eval(&self, _state: &S) -> String {
+        format!("{}: {}", self.0, self.1)
    }
-    fn key_value(&self, _state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, _state: &S) -> IndexMap<String, String> {
        [
-            (self.0.clone(), self.0.clone()),
+            (self.0.clone(), self.1.clone()),
        ].into_iter().collect()
    }
 }
@@ -93,22 +179,21 @@ impl Volume {
        }
    }
    /// Returns the volume of the region, in units of um^3
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+    fn data<S: AbstractSim>(&self, state: &S) -> f32 {
        let feat_um = state.feature_size() as f64 * 1e6;
        
        (state.volume_of_region(&*self.region) as f64 * feat_um * feat_um * feat_um) as f32
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for Volume {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for Volume {
+    fn eval(&self, state: &S) -> String {
        format!("Vol({}): {:.2e} um^3",
            self.name,
            self.data(state),
        )
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        [
            (format!("Vol({})", self.name), self.data(state).to_string()),
        ].into_iter().collect()
@@ -128,7 +213,7 @@ impl Current {
            region: Box::new(r)
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> (f32, Vec3<f32>) {
+    fn data<S: AbstractSim>(&self, state: &S) -> (f32, Vec3<f32>) {
        let FieldSample(volume, current_mag, current_vec) = state.map_sum_over_enumerated(&*self.region, |coord: Meters, _cell| {
            let current = state.current(coord);
            FieldSample(1, current.mag().cast(), current.cast())
@@ -184,16 +269,15 @@ impl std::iter::Sum for FieldSamples<[FieldSample; 3]> {
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for Current {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for Current {
+    fn eval(&self, state: &S) -> String {
        let (mean_current_mag, mean_current_vec) = self.data(state);
        format!("I/cell({}): {:.2e} {:.2e}",
            self.name,
            mean_current_mag,
            mean_current_vec)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let (mean_current_mag, mean_current_vec) = self.data(state);
        [
            (format!("Imag/cell({})", self.name), mean_current_mag.to_string()),
@@ -216,7 +300,7 @@ impl CurrentLoop {
            region: r,
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+    fn data<S: AbstractSim>(&self, state: &S) -> f32 {
        let FieldSample(volume, directed_current, _current_vec) = state.map_sum_over_enumerated(&self.region, |coord: Meters, _cell| {
            let normal = self.region.axis();
            let to_coord = *coord - *self.region.center();
@@ -232,13 +316,12 @@ impl CurrentLoop {
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for CurrentLoop {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for CurrentLoop {
+    fn eval(&self, state: &S) -> String {
        let cross_sectional_current = self.data(state);
        format!("I({}): {:.2e}", self.name, cross_sectional_current)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let cross_sectional_current = self.data(state);
        [
            (format!("I({})", self.name), cross_sectional_current.to_string()),
@@ -261,7 +344,7 @@ impl MagneticLoop {
            region: r,
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> (f32, f32, f32) {
+    fn data<S: AbstractSim>(&self, state: &S) -> (f32, f32, f32) {
        let FieldSamples([
            FieldSample(volume, directed_m, _m_vec),
            FieldSample(_, directed_b, _b_vec),
@@ -301,9 +384,8 @@ impl MagneticLoop {
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for MagneticLoop {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticLoop {
+    fn eval(&self, state: &S) -> String {
        let (mean_directed_m, mean_directed_b, mean_directed_h) = self.data(state);
        format!(
            "M({}): {:.2e}; B({}): {:.2e}; H({}): {:.2e}",
@@ -312,7 +394,7 @@ impl AbstractMeasurement for MagneticLoop {
            self.name, mean_directed_h,
        )
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let (mean_directed_m, mean_directed_b, mean_directed_h) = self.data(state);
        [
            (format!("M({})", self.name), mean_directed_m.to_string()),
@@ -336,7 +418,7 @@ impl MagneticFlux {
            region: Box::new(r)
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> Vec3<f32> {
+    fn data<S: AbstractSim>(&self, state: &S) -> Vec3<f32> {
        let FieldSample(volume, _directed_mag, mag_vec) = state.map_sum_over(&*self.region, |cell| {
            let b = cell.b();
            let mag = b.mag();
@@ -347,13 +429,12 @@ impl MagneticFlux {
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for MagneticFlux {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticFlux {
+    fn eval(&self, state: &S) -> String {
        let mean_mag = self.data(state);
        format!("Bavg({}): {:.2e}", self.name, mean_mag)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let mean_mag = self.data(state);
        [
            (format!("Bavg({})", self.name), mean_mag.to_string()),
@@ -375,7 +456,7 @@ impl Magnetization {
            region: Box::new(r)
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> Vec3<f32> {
+    fn data<S: AbstractSim>(&self, state: &S) -> Vec3<f32> {
        let FieldSample(volume, _directed_mag, mag_vec) = state.map_sum_over(&*self.region, |cell| {
            let m = cell.m();
            let mag = m.mag();
@@ -386,13 +467,12 @@ impl Magnetization {
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for Magnetization {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for Magnetization {
+    fn eval(&self, state: &S) -> String {
        let mean_mag = self.data(state);
        format!("Mavg({}): {:.2e}", self.name, mean_mag)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let mean_mag = self.data(state);
        [
            (format!("Mavg({})", self.name), mean_mag.to_string()),
@@ -408,13 +488,12 @@ fn loc(v: Meters) -> String {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct MagnetizationAt(pub Meters);

-#[typetag::serde]
-impl AbstractMeasurement for MagnetizationAt {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for MagnetizationAt {
+    fn eval(&self, state: &S) -> String {
        let m = state.sample(self.0).m();
        format!("M{}: {:.2e}", loc(self.0), m)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let m = state.sample(self.0).m();
        [
            (format!("M{}", loc(self.0)), m.to_string()),
@@ -426,13 +505,12 @@ impl AbstractMeasurement for MagnetizationAt {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct MagneticFluxAt(pub Meters);

-#[typetag::serde]
-impl AbstractMeasurement for MagneticFluxAt {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticFluxAt {
+    fn eval(&self, state: &S) -> String {
        let b = state.sample(self.0).b();
        format!("B{}: {:.2e}", loc(self.0), b)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let b = state.sample(self.0).b();
        [
            (format!("B{}", loc(self.0)), b.to_string()),
@@ -444,13 +522,12 @@ impl AbstractMeasurement for MagneticFluxAt {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct MagneticStrengthAt(pub Meters);

-#[typetag::serde]
-impl AbstractMeasurement for MagneticStrengthAt {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticStrengthAt {
+    fn eval(&self, state: &S) -> String {
        let h = state.sample(self.0).h();
        format!("H{}: {:.2e}", loc(self.0), h)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let h = state.sample(self.0).h();
        [
            (format!("H{}", loc(self.0)), h.to_string()),
@@ -461,13 +538,12 @@ impl AbstractMeasurement for MagneticStrengthAt {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct ElectricField(pub Meters);

-#[typetag::serde]
-impl AbstractMeasurement for ElectricField {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for ElectricField {
+    fn eval(&self, state: &S) -> String {
        let e = state.sample(self.0).e();
-        format!("E{}: {:.2e}", loc(self.0), e)
+        format!("E{}: {}", loc(self.0), e)
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let e = state.sample(self.0).e();
        [
            (format!("E{}", loc(self.0)), e.to_string()),
@@ -491,7 +567,7 @@ impl Energy {
            region: Box::new(region),
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+    fn data<S: AbstractSim>(&self, state: &S) -> f32 {
        // Potential energy stored in a E/M field:
        //   https://en.wikipedia.org/wiki/Magnetic_energy
        //   https://en.wikipedia.org/wiki/Electric_potential_energy#Energy_stored_in_an_electrostatic_field_distribution
@@ -508,13 +584,12 @@ impl Energy {
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for Energy {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for Energy {
+    fn eval(&self, state: &S) -> String {
        let e = self.data(state);
-        format!("U({}): {:.2e}", self.name, e)
+        format!("U({}): {}", self.name, SiScale::format_short(e, "J"))
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let e = self.data(state);
        [
            (format!("U({})", self.name), e.to_string()),
@@ -538,7 +613,7 @@ impl Power {
            region: Box::new(region),
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+    fn data<S: AbstractSim>(&self, state: &S) -> f32 {
        // Power is P = IV = A*J*V = L^2*J.(LE) = L^3 J.E
        // where L is feature size.
        #[allow(non_snake_case)]
@@ -550,13 +625,12 @@ impl Power {
    }
 }

-#[typetag::serde]
-impl AbstractMeasurement for Power {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+impl<S: AbstractSim> AbstractMeasurement<S> for Power {
+    fn eval(&self, state: &S) -> String {
        let power = self.data(state);
-        format!("P({}): {:.2e}", self.name, power)
+        format!("P({}): {}", self.name, SiScale::format_short(power, "W"))
    }
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+    fn key_value(&self, state: &S) -> IndexMap<String, String> {
        let power = self.data(state);
        [
            (format!("P({})", self.name), power.to_string()),
--- a/crates/coremem/src/render.rs
+++ b/crates/coremem/src/render.rs
@@ -1,7 +1,7 @@
-use crate::geom::Meters;
+use crate::geom::Index;
 use crate::real::ToFloat as _;
-use crate::types::vec::{Vec2, Vec3};
-use crate::sim::{SampleableSim, Sample, StaticSim};
+use crate::cross::vec::{Vec2, Vec3};
+use crate::sim::{AbstractSim, GenericSim, Sample};
 use crate::meas::{self, AbstractMeasurement};
 use crossterm::{cursor, QueueableCommand as _};
 use crossterm::style::{style, Color, PrintStyledContent, Stylize as _};
@@ -12,6 +12,8 @@ use image::{RgbImage, Rgb};
 use imageproc::{pixelops, drawing};
 use rayon::prelude::*;
 use serde::{Serialize, Deserialize};
+use std::collections::hash_map::DefaultHasher;
+use std::hash::Hasher;
 use std::fs::{File, OpenOptions};
 use std::io::{BufReader, BufWriter, Seek as _, SeekFrom, Write as _};
 use std::path::{Path, PathBuf};
@@ -52,10 +54,10 @@ fn scale_unsigned_to_u8(x: f32, typ: f32) -> u8 {
 /// Scale a vector to have magnitude between [0, 1).
 fn scale_vector(x: Vec2<f32>, typical_mag: f32) -> Vec2<f32> {
    let new_mag = scale_unsigned(x.mag(), typical_mag);
-    x.with_mag(new_mag)
+    x.with_mag(new_mag).unwrap_or_default()
 }

-fn im_size<S: SampleableSim>(state: &S, max_w: u32, max_h: u32) -> (u32, u32) {
+fn im_size<S: AbstractSim>(state: &S, max_w: u32, max_h: u32) -> (u32, u32) {
    let mut width = max_w;
    let mut height = width * state.height() / state.width();
    if height > max_h {
@@ -72,6 +74,7 @@ pub enum FieldDisplayMode {
    EzBxy,
    BCurrent,
    M,
+    Material,
 }

 impl FieldDisplayMode {
@@ -81,17 +84,19 @@ impl FieldDisplayMode {
            BzExy => EzBxy,
            EzBxy => BCurrent,
            BCurrent => M,
-            M => BzExy,
+            M => Material,
+            Material => BzExy,
        }
    }

    pub fn prev(self) -> Self {
        use FieldDisplayMode::*;
        match self {
-            BzExy => M,
+            BzExy => Material,
            EzBxy => BzExy,
            BCurrent => EzBxy,
            M => BCurrent,
+            Material => M,
        }
    }
 }
@@ -129,20 +134,22 @@ impl RenderConfig {
 struct RenderSteps<'a, S> {
    im: RgbImage, 
    sim: &'a S,
-    meas: &'a [Box<dyn AbstractMeasurement>],
+    meas: &'a [&'a dyn AbstractMeasurement<S>],
    /// Simulation z coordinate to sample
    z: u32,
 }

-impl<'a, S: SampleableSim> RenderSteps<'a, S> {
+impl<'a, S: AbstractSim> RenderSteps<'a, S> {
+    // TODO: this could probably be a single measurement, and we just let collections of
+    // measurements also behave as measurements
    /// Render using default configuration constants
-    fn render(state: &'a S, measurements: &'a [Box<dyn AbstractMeasurement>], z: u32) -> RgbImage {
+    fn render(state: &'a S, measurements: &'a [&'a dyn AbstractMeasurement<S>], z: u32) -> RgbImage {
        Self::render_configured(state, measurements, z, (640, 480), RenderConfig::default())
    }
    /// Render, controlling things like the size.
    fn render_configured(
        state: &'a S,
-        measurements: &'a [Box<dyn AbstractMeasurement>],
+        measurements: &'a [&'a dyn AbstractMeasurement<S>],
        z: u32,
        max_size: (u32, u32),
        config: RenderConfig,
@@ -174,11 +181,14 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
            FieldDisplayMode::M => {
                me.render_m(config.scale);
            }
+            FieldDisplayMode::Material => {
+                me.render_mat(config.scale);
+            }
        }
        me.render_measurements();
        me.im
    }
-    fn new(sim: &'a S, meas: &'a [Box<dyn AbstractMeasurement>], width: u32, height: u32, z: u32) -> Self {
+    fn new(sim: &'a S, meas: &'a [&'a dyn AbstractMeasurement<S>], width: u32, height: u32, z: u32) -> Self {
        RenderSteps {
            im: RgbImage::new(width, height),
            sim,
@@ -187,13 +197,10 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
        }
    }

-    fn get_at_px(&self, x_px: u32, y_px: u32) -> Sample {
-        let x_prop = x_px as f32 / self.im.width() as f32;
-        let x_m = x_prop * (self.sim.width() as f32 * self.sim.feature_size() as f32);
-        let y_prop = y_px as f32 / self.im.height() as f32;
-        let y_m = y_prop * (self.sim.height() as f32 * self.sim.feature_size() as f32);
-        let z_m = self.z as f32 * self.sim.feature_size() as f32;
-        self.sim.sample(Meters(Vec3::new(x_m, y_m, z_m)))
+    fn get_at_px<'b>(&'b self, x_px: u32, y_px: u32) -> Sample<'b, S::Real, S::Material> {
+        let x_idx = x_px * self.sim.width() / self.im.width();
+        let y_idx = y_px * self.sim.height() / self.im.height();
+        self.sim.sample(Index::new(x_idx, y_idx, self.z))
    }

    //////////////   Ex/Ey/Bz configuration ////////////
@@ -230,7 +237,22 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
        self.render_vector_field(Rgb([0xff, 0xff, 0xff]), 1.0e5 * scale, |cell| cell.m().xy().to_f32());
    }

-    fn render_vector_field<F: Fn(&Sample) -> Vec2<f32>>(&mut self, color: Rgb<u8>, typical: f32, measure: F) {
+    fn render_mat(&mut self, scale: f32) {
+        unsafe fn to_bytes<T>(d: &T) -> &[u8] {
+            std::slice::from_raw_parts(d as *const T as *const u8, std::mem::size_of::<T>())
+        }
+        self.render_scalar_field(scale, false, 1, |cell| {
+            let mut hasher = DefaultHasher::new();
+            let as_bytes = unsafe { to_bytes(cell.material()) };
+            std::hash::Hash::hash_slice(as_bytes, &mut hasher);
+            hasher.finish() as f32 / (-1i64 as u64 as f32)
+        });
+    }
+
+    fn render_vector_field<F>(&mut self, color: Rgb<u8>, typical: f32, measure: F)
+    where
+        F: Fn(&Sample<'_, S::Real, S::Material>) -> Vec2<f32>
+    {
        let w = self.im.width();
        let h = self.im.height();
        let vec_spacing = 10;
@@ -245,7 +267,10 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
            }
        }
    }
-    fn render_scalar_field<F: Fn(&Sample) -> f32 + Sync>(&mut self, typical: f32, signed: bool, slot: u32, measure: F) {
+    fn render_scalar_field<F>(&mut self, typical: f32, signed: bool, slot: u32, measure: F)
+    where
+        F: Fn(&Sample<'_, S::Real, S::Material>) -> f32 + Sync
+    {
        // XXX: get_at_px borrows self, so we need to clone the image to operate on it mutably.
        let mut im = self.im.clone();
        let w = im.width();
@@ -291,7 +316,10 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
        }
    }

-    fn field_vector<F: Fn(&Sample) -> Vec2<f32>>(&self, xidx: u32, yidx: u32, size: u32, measure: &F) -> Vec2<f32> {
+    fn field_vector<F>(&self, xidx: u32, yidx: u32, size: u32, measure: &F) -> Vec2<f32>
+    where
+        F: Fn(&Sample<'_, S::Real, S::Material>) -> Vec2<f32>
+    {
        let mut field = Vec2::default();
        let w = self.im.width();
        let h = self.im.height();
@@ -340,25 +368,25 @@ impl ImageRenderExt for RgbImage {
 }

 pub trait Renderer<S>: Send + Sync {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig);
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig);
    // {
    //     self.render_with_image(state, &RenderSteps::render(state, measurements, z), measurements);
    // }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig);
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig);
    /// Not intended to be called directly by users; implement this if you want the image to be
    /// computed using default settings and you just manage where to display/save it.
-    fn render_with_image(&self, state: &S, _im: &RgbImage, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_with_image(&self, state: &S, _im: &RgbImage, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        self.render(state, measurements, config);
    }
 }

-fn default_render_z_slice<S: SampleableSim, R: Renderer<S>>(
-    me: &R, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig,
+fn default_render_z_slice<S: AbstractSim, R: Renderer<S>>(
+    me: &R, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig,
 ) {
    me.render_with_image(state, &RenderSteps::render(state, measurements, z), measurements, config);
 }
-fn default_render<S: SampleableSim, R: Renderer<S>>(
-    me: &R, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig
+fn default_render<S: AbstractSim, R: Renderer<S>>(
+    me: &R, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig
 ) {
        me.render_z_slice(state, state.depth() / 2, measurements, config);
 }
@@ -366,7 +394,7 @@ fn default_render<S: SampleableSim, R: Renderer<S>>(
 // pub struct NumericTermRenderer;
 // 
 // impl Renderer for NumericTermRenderer {
-//     fn render(&mut self, state: &SimSnapshot, _measurements: &[Box<dyn AbstractMeasurement>]) {
+//     fn render(&mut self, state: &SimSnapshot, _measurements: &[&dyn AbstractMeasurement<S>]) {
 //         for y in 0..state.height() {
 //             for x in 0..state.width() {
 //                 let cell = state.get((x, y).into());
@@ -386,15 +414,15 @@ fn default_render<S: SampleableSim, R: Renderer<S>>(
 #[derive(Default)]
 pub struct ColorTermRenderer;

-impl<S: SampleableSim> Renderer<S> for ColorTermRenderer {
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+impl<S: AbstractSim> Renderer<S> for ColorTermRenderer {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render(self, state, measurements, config)
    }
    fn render_z_slice(
        &self,
        state: &S,
        z: u32,
-        measurements: &[Box<dyn AbstractMeasurement>],
+        measurements: &[&dyn AbstractMeasurement<S>],
        config: RenderConfig,
    ) {
        let (max_w, mut max_h) = crossterm::terminal::size().unwrap();
@@ -448,14 +476,14 @@ impl Y4MRenderer {
    }
 }

-impl<S: SampleableSim> Renderer<S> for Y4MRenderer {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+impl<S: AbstractSim> Renderer<S> for Y4MRenderer {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render(self, state, measurements, config)
    }
-    fn render_with_image(&self, _state: &S, im: &RgbImage, _meas: &[Box<dyn AbstractMeasurement>], _config: RenderConfig) {
+    fn render_with_image(&self, _state: &S, im: &RgbImage, _meas: &[&dyn AbstractMeasurement<S>], _config: RenderConfig) {
        {
            let mut enc = self.encoder.lock().unwrap();
            if enc.is_none() {
@@ -540,17 +568,17 @@ impl<S> MultiRenderer<S> {
    }
 }

-impl<S: SampleableSim> Renderer<S> for MultiRenderer<S> {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+impl<S: AbstractSim> Renderer<S> for MultiRenderer<S> {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        if self.renderers.read().unwrap().len() != 0 {
            self.render_with_image(state, &RenderSteps::render(state, measurements, state.depth() / 2), measurements, config);
        }
    }

-    fn render_with_image(&self, state: &S, im: &RgbImage, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_with_image(&self, state: &S, im: &RgbImage, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        for r in &*self.renderers.read().unwrap() {
            if r.work_this_frame(state.step_no()) {
                r.renderer.render_with_image(state, im, measurements, config);
@@ -560,25 +588,28 @@ impl<S: SampleableSim> Renderer<S> for MultiRenderer<S> {
 }

 #[derive(Serialize, Deserialize)]
-pub struct SerializedFrame<S=StaticSim> {
+pub struct SerializedFrame<S> {
    pub state: S,
    /// although not generally necessary to load the sim, saving the measurements is beneficial for
    /// post-processing.
-    pub measurements: Vec<Box<dyn AbstractMeasurement>>,
+    pub measurements: Vec<meas::Evaluated>,
 }

-impl<S: SampleableSim> SerializedFrame<S> {
-    pub fn to_static(self) -> SerializedFrame<StaticSim> {
+impl<S: AbstractSim> SerializedFrame<S> {
+    pub fn to_generic(self) -> SerializedFrame<GenericSim<S::Real>> {
        SerializedFrame {
-            state: SampleableSim::to_static(&self.state),
+            state: AbstractSim::to_generic(&self.state),
            measurements: self.measurements,
        }
    }
 }

+/// this serializes the simulation state plus measurements to disk.
+/// it can either convert the state to a generic, material-agnostic format (generic)
+/// or dump it as-is.
 pub struct SerializerRenderer {
    fmt_str: String,
-    prefer_static: bool,
+    prefer_generic: bool,
 }

 impl SerializerRenderer {
@@ -587,25 +618,25 @@ impl SerializerRenderer {
    pub fn new(fmt_str: &str) -> Self {
        Self {
            fmt_str: fmt_str.into(),
-            prefer_static: false,
+            prefer_generic: false,
        }
    }

-    /// Same as `new`, but cast to StaticSim before serializing. This yields a file that's easier
-    /// for post-processing, and may be smaller in size.
-    pub fn new_static(fmt_str: &str) -> Self {
+    /// Same as `new`, but cast to GenericSim before serializing. This yields a file that's easier
+    /// for post-processing.
+    pub fn new_generic(fmt_str: &str) -> Self {
        Self {
            fmt_str: fmt_str.into(),
-            prefer_static: true,
+            prefer_generic: true,
        }
    }
 }

 impl SerializerRenderer {
-    fn serialize<S: SampleableSim + Serialize>(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>]) {
+    fn serialize<S: AbstractSim + Serialize>(&self, state: &S, measurements: Vec<meas::Evaluated>) {
        let frame = SerializedFrame {
            state,
-            measurements: measurements.iter().cloned().collect(),
+            measurements,
        };
        let name = self.fmt_str.replace("{step_no}", &*frame.state.step_no().to_string());
        let out = BufWriter::new(File::create(name).unwrap());
@@ -613,21 +644,21 @@ impl SerializerRenderer {
        bincode::serialize_into(out, &frame).unwrap();
    }

-    pub fn try_load<S: SampleableSim + for <'a> Deserialize<'a>>(&self) -> Option<SerializedFrame<S>> {
+    pub fn try_load<S: AbstractSim + for <'a> Deserialize<'a>>(&self) -> Option<SerializedFrame<S>> {
        let mut reader = BufReader::new(File::open(&*self.fmt_str).ok()?);
        bincode::deserialize_from(&mut reader).ok()
    }
 }

-impl<S: SampleableSim + Serialize> Renderer<S> for SerializerRenderer {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+impl<S: AbstractSim + Serialize> Renderer<S> for SerializerRenderer {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], _config: RenderConfig) {
-        if self.prefer_static {
-            self.serialize(&state.to_static(), measurements);
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], _config: RenderConfig) {
+        if self.prefer_generic {
+            self.serialize(&state.to_generic(), meas::eval_to_vec(state, measurements));
        } else {
-            self.serialize(state, measurements);
+            self.serialize(state, meas::eval_to_vec(state, measurements));
        }
    }
 }
@@ -662,11 +693,11 @@ impl CsvRenderer {
    }
 }

-impl<S: SampleableSim> Renderer<S> for CsvRenderer {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+impl<S: AbstractSim> Renderer<S> for CsvRenderer {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], _config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], _config: RenderConfig) {
        let row = meas::eval_multiple_kv(state, measurements);
        let step = state.step_no();
        let mut lock = self.state.lock().unwrap();
--- a/crates/coremem/src/sim/legacy/mat/bh_ferromagnet.rs
+++ b/crates/coremem/src/sim/legacy/mat/bh_ferromagnet.rs
@@ -1,8 +1,9 @@
 use super::Material;
+use crate::material_compat;
 use crate::geom::{Line2d, Polygon2d};
 use crate::real::Real;
 use crate::sim::legacy::{CellState, StepParametersMut};
-use crate::types::vec::{Vec2, Vec3};
+use crate::cross::vec::{Vec2, Vec3};

 use lazy_static::lazy_static;
 use log::trace;
@@ -199,6 +200,7 @@ impl<R: Real> Material<R> for Ferroxcube3R1<R> {
        StepParametersMut::default().with_conductivity(Vec3::uniform(1e-3))
    }
 }
+material_compat!(R, Ferroxcube3R1<R>);

 /// Simple, square-loop ferrite
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
@@ -235,6 +237,7 @@ impl<R: Real> Material<R> for MinimalSquare<R> {
        StepParametersMut::default().with_conductivity(Vec3::uniform(1e-3))
    }
 }
+material_compat!(R, MinimalSquare<R>);


 #[cfg(test)]
--- a/crates/coremem/src/sim/legacy/mat/db.rs
+++ b/crates/coremem/src/sim/legacy/mat/db.rs
@@ -2,7 +2,7 @@

 use super::{AnisomorphicConductor, IsomorphicConductor, LinearMagnet, Ferroxcube3R1, MinimalSquare};
 use crate::real::Real;
-use crate::types::vec::Vec3;
+use crate::cross::vec::Vec3;

 pub fn conductor<R: Real, R2: Real>(conductivity: R2) -> IsomorphicConductor<R> {
    IsomorphicConductor::new(conductivity.cast())
--- a/crates/coremem/src/sim/legacy/mat/linear.rs
+++ b/crates/coremem/src/sim/legacy/mat/linear.rs
@@ -1,7 +1,8 @@
 use super::Material;
+use crate::material_compat;
 use crate::real::Real;
 use crate::sim::legacy::CellState;
-use crate::types::vec::Vec3;
+use crate::cross::vec::Vec3;

 use serde::{Serialize, Deserialize};

@@ -45,6 +46,7 @@ impl<R: Real> Material<R> for LinearMagnet<R> {
        self.m += delta_m;
    }
 }
+material_compat!(R, LinearMagnet<R>);

 #[cfg(test)]
 mod test {
--- a/crates/coremem/src/sim/legacy/mat/mod.rs
+++ b/crates/coremem/src/sim/legacy/mat/mod.rs
@@ -1,8 +1,7 @@
 use crate::real::Real;
 use crate::sim::legacy::{CellState, PmlParameters, PmlState, StepParameters, StepParametersMut};
-use crate::types::vec::Vec3;
+use crate::cross::vec::Vec3;

-use enum_dispatch::enum_dispatch;
 use serde::{Serialize, Deserialize};

 pub mod db;
@@ -10,7 +9,7 @@ mod bh_ferromagnet;
 mod linear;

 pub use bh_ferromagnet::*;
-pub use coremem_types::mat::{
+pub use coremem_cross::mat::{
    AnisomorphicConductor,
    Ferroxcube3R1MH,
    FullyGenericMaterial,
@@ -20,7 +19,6 @@ pub use coremem_types::mat::{
 };
 pub use linear::*;

-#[enum_dispatch]
 pub trait Material<R: Real> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        // by default, behave as a vacuum
@@ -34,6 +32,22 @@ pub trait Material<R: Real> {
    fn step_b(&mut self, _context: &CellState<R>, _delta_b: Vec3<R>) {
    }
 }
+#[macro_export]
+macro_rules! material_compat {
+    (R, $mat:path) => {
+        // XXX this is not that useful an implementation.
+        // it exists mostly because some users want the `Material::conductivity()` method.
+        impl<R: Real> crate::mat::Material<R> for $mat {
+            fn conductivity(&self) -> Vec3<R> {
+                crate::sim::legacy::mat::MaterialExt::step_parameters(self).conductivity()
+            }
+            fn move_b_vec(&self, _m: Vec3<R>, _target_b: Vec3<R>) -> Vec3<R> {
+                unimplemented!()
+            }
+        }
+    }
+}
+

 pub trait MaterialExt<R> {
    fn step_parameters<'a>(&'a self) -> StepParameters<'a, R>;
@@ -84,6 +98,7 @@ impl<R: Real> Material<R> for Static<R> {
        self.m
    }
 }
+material_compat!(R, Static<R>);

 impl<R: Real, T> From<T> for Static<R>
 where T: Into<GenericMaterial<R>>
@@ -108,9 +123,9 @@ impl<R: Real> Material<R> for Pml<R> {
        StepParametersMut::default().with_pml(&mut self.0, self.1)
    }
 }
+material_compat!(R, Pml<R>);


-// #[enum_dispatch(Material)]
 #[derive(Clone, PartialEq, Serialize, Deserialize)]
 pub enum GenericMaterial<R> {
    Conductor(AnisomorphicConductor<R>),
@@ -207,8 +222,8 @@ impl<R: Real> Material<R> for GenericMaterial<R> {
        }
    }
 }
+material_compat!(R, GenericMaterial<R>);

-// #[enum_dispatch(Material)]
 #[derive(Clone, Serialize, Deserialize)]
 pub enum GenericMaterialNoPml<R> {
    Conductor(AnisomorphicConductor<R>),
@@ -264,10 +279,10 @@ impl<R: Real> Material<R> for GenericMaterialNoPml<R> {
        }
    }
 }
+material_compat!(R, GenericMaterialNoPml<R>);


 /// Materials which have only 1 Vec3.
-// #[enum_dispatch(Material)]
 #[derive(Clone, Serialize, Deserialize)]
 pub enum GenericMaterialOneField<R> {
    Conductor(AnisomorphicConductor<R>),
@@ -315,15 +330,16 @@ impl<R: Real> Material<R> for GenericMaterialOneField<R> {
        }
    }
 }
+material_compat!(R, GenericMaterialOneField<R>);

-// coremem_types adapters
+// coremem_cross adapters
 // TODO: move this to a dedicated file

-/// the coremem_types Materials are stateless;
+/// the coremem_cross Materials are stateless;
 /// rather than hold onto their own magnetic fields (for example), the simulation holds that.
 /// that's counter to the original cpu-only simulation, in which materials hold their own state.
 ///
-/// this type adapts any stateless coremem_types::mat::Material type to be a coremem::mat::Material.
+/// this type adapts any stateless coremem_cross::mat::Material type to be a coremem::mat::Material.
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
 pub struct AdaptStateless<R, M> {
    mat: M,
@@ -342,7 +358,7 @@ impl<R: Default, M> From<M> for AdaptStateless<R, M> {
    }
 }

-impl<R: Real, M: coremem_types::mat::Material<R>> Material<R> for AdaptStateless<R, M> {
+impl<R: Real, M: coremem_cross::mat::Material<R>> Material<R> for AdaptStateless<R, M> {
    fn m(&self) -> Vec3<R> {
        self.m
    }
@@ -359,20 +375,21 @@ impl<R: Real, M: coremem_types::mat::Material<R>> Material<R> for AdaptStateless
 // conductors: these are truly stateless
 impl<R: Real> Material<R> for AnisomorphicConductor<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
-        let c = coremem_types::mat::Material::conductivity(self);
+        let c = coremem_cross::mat::Material::conductivity(self);
        StepParametersMut::default().with_conductivity(c)
    }
 }
 impl<R: Real> Material<R> for IsomorphicConductor<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
-        let c = coremem_types::mat::Material::conductivity(self);
+        let c = coremem_cross::mat::Material::conductivity(self);
        StepParametersMut::default().with_conductivity(c)
    }
 }

-pub type MBPgram<R> = AdaptStateless<R, coremem_types::mat::MBPgram<R>>;
+pub type MBPgram<R> = AdaptStateless<R, coremem_cross::mat::MBPgram<R>>;
 impl<R: Real> MBPgram<R> {
    pub fn new(b_start: R, b_end: R, m_max: R) -> Self {
-        coremem_types::mat::MBPgram::new(b_start, b_end, m_max).into()
+        coremem_cross::mat::MBPgram::new(b_start, b_end, m_max).into()
    }
 }
+material_compat!(R, MBPgram<R>);
--- a/crates/coremem/src/sim/legacy/mod.rs
+++ b/crates/coremem/src/sim/legacy/mod.rs
@@ -1,19 +1,19 @@
 pub mod mat;

-use crate::geom::{Coord, Index, Meters};
-use crate::types::real::{R32, Real};
-use crate::types::vec::{Vec3, Vec3u};
-use crate::sim::{CellStateWithM, GenericSim, MaterialSim, Sample, SampleableSim};
+use crate::geom::{Coord, Index};
+use crate::cross;
+use crate::cross::real::{R32, Real};
+use crate::cross::step::SimMeta;
+use crate::cross::vec::{Vec3, Vec3u};
+use crate::sim::{AbstractSim, Fields, GenericSim, StaticSim};
 use crate::stim::AbstractStimulus;

-use mat::{GenericMaterial, Material, MaterialExt as _};
+use mat::{GenericMaterial, Material};

 use log::trace;
 use ndarray::{Array3, Zip};
 use serde::{Serialize, Deserialize};

-pub type StaticSim = SimState<R32, mat::Static<R32>>;
-
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
 pub struct PmlState<R> {
    /// Auxiliary fields used when stepping E
@@ -264,7 +264,7 @@ impl<R: Real, M: Default> SimState<R, M> {
    }
 }

-impl<R: Real, M: Material<R> + Send + Sync> SimState<R, M> {
+impl<R: Real, M: Material<R> + cross::mat::Material<R> + Send + Sync> SimState<R, M> {
    pub fn step(&mut self) {
        let time_step = self.timestep;
        let half_time_step = R::half() * time_step;
@@ -305,9 +305,9 @@ impl<R: Real, M: Material<R> + Send + Sync> SimState<R, M> {
        Zip::from(ndarray::indices_of(&self.e)).and(&mut self.e).and(&mut self.h).par_for_each(
            |(z, y, x), e, h| {
                let pos_meters = Index((x as u32, y as u32, z as u32).into()).to_meters(feature_size);
-                let (density_e, density_h) = stim.at(t_sec, pos_meters);
-                let value_e = density_e.cast::<R>() * timestep.cast::<R>();
-                let value_h = density_h.cast::<R>() * timestep.cast::<R>();
+                let densities = stim.at(t_sec, pos_meters);
+                let value_e = densities.e.cast::<R>() * timestep.cast::<R>();
+                let value_h = densities.h.cast::<R>() * timestep.cast::<R>();
                *e += value_e;
                *h += value_h;
            });
@@ -315,56 +315,34 @@ impl<R: Real, M: Material<R> + Send + Sync> SimState<R, M> {
    }
 }

-impl<R: Real, M: Material<R> + Clone + Send + Sync + PartialEq> MaterialSim for SimState<R, M> {
+impl<R: Real, M: Material<R> + cross::mat::Material<R> + Send + Sync> AbstractSim for SimState<R, M> {
+    type Real = R;
    type Material = M;
-
-    fn put_material<C: Coord, M2: Into<Self::Material>>(&mut self, pos: C, mat: M2) {
-        *self.get_mat_mut(pos) = mat.into();
-    }
-
-    fn get_material<C: Coord>(&self, pos: C) -> M {
-        self.get_material(pos).clone()
-    }
-}
-
-impl<R: Real, M: Material<R> + Send + Sync> SampleableSim for SimState<R, M> {
-    fn sample(&self, pos: Meters) -> Sample {
-        // TODO: smarter sampling than nearest neighbor?
-        let pos_sim = pos.to_index(self.feature_size());
-        let idx = [pos_sim.z() as usize, pos_sim.y() as _, pos_sim.x() as _];
-        match self.cells.get(idx) {
-            Some(mat) => Sample {
-                state: CellStateWithM {
-                    e: self.e[idx].cast(),
-                    h: self.h[idx].cast(),
-                    m: mat.m().cast(),
-                },
-                conductivity: mat.conductivity().cast(),
-            },
-            None => Default::default(),
-        }
-    }
-
-    fn size(&self) -> Index {
-        Index(Vec3u::new(
+    fn meta(&self) -> SimMeta<f32> {
+        let dim = Vec3u::new(
            self.cells.shape()[2] as _,
            self.cells.shape()[1] as _,
            self.cells.shape()[0] as _,
-        ))
-    }
-    fn feature_size(&self) -> f32 {
-        self.feature_size.to_f32()
-    }
-    fn timestep(&self) -> f32 {
-        self.timestep.to_f32()
+        );
+        let feature_size = self.feature_size.to_f32();
+        let inv_feature_size = 1.0/feature_size;
+        let time_step = self.timestep.to_f32();
+        SimMeta {
+            dim, feature_size, inv_feature_size, time_step
+        }
    }
    fn step_no(&self) -> u64 {
        self.step_no
    }
-}

+    fn put_material_index(&mut self, pos: Index, mat: M) {
+        *self.get_mat_mut(pos) = mat;
+    }
+
+    fn get_material_index(&self, pos: Index) -> &M {
+        self.get_material(pos)
+    }

-impl<R: Real, M: Material<R> + Send + Sync> GenericSim for SimState<R, M> {
    fn step_multiple<S: AbstractStimulus>(&mut self, num_steps: u32, s: &S) {
        for _ in 0..num_steps {
            self.apply_stimulus(s);
@@ -372,16 +350,21 @@ impl<R: Real, M: Material<R> + Send + Sync> GenericSim for SimState<R, M> {
        }
    }

-    fn impulse_e_meters(&mut self, pos: Meters, amount: Vec3<f32>) {
-        *self.get_e_mut(pos) += amount.cast();
+    fn fields_at_index(&self, pos: Index) -> Fields<R> {
+        let idx = [pos.z() as usize, pos.y() as _, pos.x() as _];
+        match self.cells.get(idx) {
+            Some(mat) => Fields::new(
+                self.e[idx], self.h[idx], mat.m()
+            ).cast(),
+            None => Default::default(),
+        }
    }

-    fn impulse_h_meters(&mut self, pos: Meters, amount: Vec3<f32>) {
-        self.impulse_b_meters(pos, amount * f32::mu0());
+    fn to_static(&self) -> StaticSim {
+        unimplemented!()
    }
-
-    fn impulse_b_meters(&mut self, pos: Meters, amount: Vec3<f32>) {
-        self.get_hcell(pos).impulse_b(amount.cast());
+    fn to_generic(&self) -> GenericSim<Self::Real> {
+        unimplemented!()
    }
 }

@@ -450,61 +433,6 @@ impl<'a, R> Neighbors<'a, R> {
    }
 }

-impl Sample {
-    pub fn e(&self) -> Vec3<f32> {
-        self.state.e()
-    }
-    pub fn ex(&self) -> f32 {
-        self.e().x()
-    }
-    pub fn ey(&self) -> f32 {
-        self.e().y()
-    }
-    pub fn ez(&self) -> f32 {
-        self.e().z()
-    }
-
-    pub fn h(&self) -> Vec3<f32> {
-        self.state.h()
-    }
-    pub fn hx(&self) -> f32 {
-        self.h().x()
-    }
-    pub fn hy(&self) -> f32 {
-        self.h().y()
-    }
-    pub fn hz(&self) -> f32 {
-        self.h().z()
-    }
-
-    pub fn b(&self) -> Vec3<f32> {
-        self.state.b()
-    }
-    pub fn bx(&self) -> f32 {
-        self.b().x()
-    }
-    pub fn by(&self) -> f32 {
-        self.b().y()
-    }
-    pub fn bz(&self) -> f32 {
-        self.b().z()
-    }
-
-    pub fn m(&self) -> Vec3<f32> {
-        self.state.m()
-    }
-    pub fn conductivity(&self) -> Vec3<f32> {
-        self.conductivity
-    }
-
-    pub fn current_density(&self) -> Vec3<f32> {
-        // TODO: does this make sense for Pml?
-        let conductivity = self.conductivity();
-        self.e().elem_mul(conductivity)
-    }
-
-}
-
 /// Solve df/dt in:
 /// nabla_g = df_dt_coeff * df/dt + f_coeff * f + f_int_coeff * \int f + f_int_int_coeff * \int \int f
 #[allow(unused)]
@@ -581,18 +509,6 @@ impl<'a, R: Real, M: Material<R>> HCell<'a, R, M> {
        (*self.h + self.mat.m()) * R::mu0()
    }

-    fn impulse_b(&mut self, delta_b: Vec3<R>) {
-        let b_prev = self.b();
-        let state = CellState {
-            e: *self.e,
-            h: *self.h,
-        };
-        self.mat.step_b(&state, delta_b);
-        // mu0 (H + M) = B
-        // therefore, H = B mu0^-1 - M
-        *self.h = (b_prev + delta_b) * R::mu0_inv() - self.mat.m();
-    }
-
    fn step(&mut self, neighbors: Neighbors<'_, R>, delta_t: R, inv_feature_size: R) {
        // ```tex
        // Maxwell-Faraday equation: $\nabla x E = -dB/dt$   (1)
@@ -865,13 +781,8 @@ impl<R: Real> CellState<R> {
    pub fn h(&self) -> Vec3<R> {
        self.h
    }
-
-    pub fn with_m(&self, m: Vec3<R>) -> CellStateWithM<R> {
-        CellStateWithM {
-            e: self.e,
-            h: self.h,
-            m,
-        }
+    pub fn with_m(&self, m: Vec3<R>) -> Fields<R> {
+        Fields::new(self.e, self.h, m)
    }
 }

@@ -881,8 +792,9 @@ impl<R: Real> CellState<R> {
 #[cfg(test)]
 mod test {
    use super::*;
-    use crate::geom::{Cube, Region, WorldRegion};
+    use crate::geom::{Cube, Meters, Region, WorldRegion};
    use crate::sim::legacy::mat::{AnisomorphicConductor, IsomorphicConductor, Pml, Static};
+    use crate::stim::Fields;
    use crate::real::{R64, ToFloat as _};
    use float_eq::assert_float_eq;
    use more_asserts::*;
@@ -1225,14 +1137,14 @@ mod test {
        assert_vec3_eq(actual_df_dt, df_dt(tm));
    }

-    fn energy(s: &dyn SampleableSim) -> f32 {
+    fn energy<S: AbstractSim>(s: &S) -> f32 {
        0.5 * s.feature_volume() * s.map_sum_enumerated(|_pos: Index, cell| {
            // println!("{}: {}", _pos, cell.e());
            cell.e().mag_sq().to_f64()
        }).to_f32()
    }

-    fn energy_now_and_then<S: GenericSim>(state: &mut S, frames: u32) -> (f32, f32) {
+    fn energy_now_and_then<S: AbstractSim>(state: &mut S, frames: u32) -> (f32, f32) {
        let energy_0 = energy(state);
        for _f in 0..frames {
            // println!("e({}) = {}", _f, energy(state));
@@ -1255,6 +1167,22 @@ mod test {
        assert_eq!(state.time(), state.timestep());
    }

+    struct ConstStim {
+        loc: Index,
+        feature_size: f32,
+        e: Vec3<f32>,
+        h: Vec3<f32>,
+    }
+    impl AbstractStimulus for ConstStim {
+        fn at(&self, _t_sec: f32, loc: Meters) -> Fields {
+            if loc.to_index(self.feature_size) == self.loc {
+                Fields { e: self.e, h: self.h }
+            } else {
+                Default::default()
+            }
+        }
+    }
+
    #[test]
    fn energy_conservation_over_time() {
        let mut state = SimState::<R64, Static<R64>>::new(Index((2001, 1, 1).into()), 1e-6);
@@ -1263,9 +1191,12 @@ mod test {
            let sig = angle.sin();
            let gate = 0.5*(1.0 - angle.cos());
            //println!("{}", g.exp());
-            state.impulse_ez(Index((1000, 0, 0).into()), gate*sig);
-            //dyn_state.impulse_by(index((x, 0, 0).into()), 1.5e-9*g.exp());
-            state.step();
+            state.step_multiple(1, &ConstStim {
+                loc: Index::new(0, 0, 1000),
+                feature_size: 1e-6,
+                e: Vec3::new_z(gate*sig/state.timestep()),
+                h: Vec3::zero(),
+            });
        }
        let (energy_0, energy_1) = energy_now_and_then(&mut state, 800);
        // This has some sensitivity to courant number. But surprisingly, not much to f32 v.s. f64
@@ -1279,8 +1210,12 @@ mod test {
            let angle = (t as f32)/10.0*f32::two_pi();
            let sig = angle.sin();
            let gate = 1e9 * 0.5*(1.0 - angle.cos());
-            state.impulse_ez(Index((10, 10, 10).into()), gate*sig);
-            state.step();
+            state.step_multiple(1, &ConstStim {
+                loc: Index::new(10, 10, 10),
+                feature_size: 1e-6,
+                e: Vec3::new_z(gate*sig/state.timestep()),
+                h: Vec3::zero(),
+            });
        }
        let (energy_0, energy_1) = energy_now_and_then(&mut state, 500);
        // Default boundary conditions reflect waves.
@@ -1298,8 +1233,12 @@ mod test {
            let angle = 2.0*progress*f32::two_pi();
            let sig = angle.sin();
            let gate = (progress*f32::pi()).sin();
-            state.impulse_ez(Index((100, 0, 0).into()), gate*sig);
-            state.step();
+            state.step_multiple(1, &ConstStim {
+                loc: Index::new(100, 0, 0),
+                feature_size: 1e-6,
+                e: Vec3::new_z(gate*sig/state.timestep()),
+                h: Vec3::zero(),
+            });
        }
        energy_now_and_then(&mut state, 1000)
    }
@@ -1376,19 +1315,22 @@ mod test {
        sqrt_vol: f32,
    }
    impl<F: Fn(Index) -> (Vec3<f32>, f32) + Sync> AbstractStimulus for PmlStim<F> {
-        fn at(&self, t_sec: f32, pos: Meters) -> (Vec3<f32>, Vec3<f32>) {
+        fn at(&self, t_sec: f32, pos: Meters) -> Fields {
            let angle = t_sec/self.t_end * f32::two_pi();
            let gate = 0.5*(1.0 - angle.cos());
            let (e, hz) = (self.f)(pos.to_index(self.feat_size));
            let sig_angle = angle*hz;
            let sig = sig_angle.sin();
            // TODO: test H component?
-            (e*(gate*sig / (self.t_end * self.sqrt_vol)), Vec3::zero())
+            Fields {
+                e: e*(gate*sig / (self.t_end * self.sqrt_vol)),
+                h: Vec3::zero()
+            }
        }
    }

    /// Returns the energy ratio (Eend / Estart)
-    fn pml_test_full_interior<F, S: GenericSim>(state: &mut S, f: F) -> f32
+    fn pml_test_full_interior<F, S: AbstractSim>(state: &mut S, f: F) -> f32
    where F: Fn(Index) -> (Vec3<f32>, f32) + Sync // returns (E vector, cycles (like Hz))
    {
        let stim = PmlStim {
@@ -1412,7 +1354,7 @@ mod test {
    where
        R: Region,
        F: Fn(Index) -> (Vec3<f32>, f32) + Sync,
-        S: GenericSim
+        S: AbstractSim,
    {
        let feat = state.feature_size();
        pml_test_full_interior(state, |idx| {
@@ -1424,7 +1366,7 @@ mod test {
        })
    }

-    fn pml_test_at<S: GenericSim>(state: &mut S, e: Vec3<f32>, center: Index) -> f32 {
+    fn pml_test_at<S: AbstractSim>(state: &mut S, e: Vec3<f32>, center: Index) -> f32 {
        pml_test_full_interior(state, |idx| {
            if idx == center {
                (e, 1.0)
@@ -1434,14 +1376,14 @@ mod test {
        })
    }

-    fn pml_test<S: GenericSim, R: RangeBounds<f32> + Debug>(state: &mut S, e: Vec3<f32>, en_range: R) {
+    fn pml_test<S: AbstractSim, R: RangeBounds<f32> + Debug>(state: &mut S, e: Vec3<f32>, en_range: R) {
        let center = Index(state.size().0 / 2);
        let pml = pml_test_at(state, e, center);
        // PML should absorb all energy
        assert!(en_range.contains(&pml), "{} not in {:?}", pml, en_range);
    }

-    fn pml_test_against_baseline<P: GenericSim, B: GenericSim>(pml_state: &mut P, baseline_state: &mut B, e: Vec3<f32>) {
+    fn pml_test_against_baseline<P: AbstractSim, B: AbstractSim>(pml_state: &mut P, baseline_state: &mut B, e: Vec3<f32>) {
        assert_eq!(pml_state.size(), baseline_state.size());
        let center = Index(pml_state.size().0 / 2);
        let en_pml = pml_test_at(pml_state, e, center);
@@ -1646,7 +1588,7 @@ mod test {

    // XXX the energies here are even worse than above.
    // just don't use our PML implementation: it's not great.
-    // fn pml_test_uniform_over_region<R: Region, S: GenericSim>(state: &mut S, reg: R, e: Vec3<f32>, hz: f32) -> f32 {
+    // fn pml_test_uniform_over_region<R: Region, S: AbstractSim>(state: &mut S, reg: R, e: Vec3<f32>, hz: f32) -> f32 {
    //     pml_test_over_region(state, reg, |_idx| {
    //         (e, hz)
    //     })
--- a/crates/coremem/src/sim/mod.rs
+++ b/crates/coremem/src/sim/mod.rs
@@ -1,8 +1,9 @@
-use crate::geom::{Coord, Cube, Index, InvertedRegion, Meters, Region};
-use crate::types::real::Real;
-use crate::types::vec::{Vec3, Vec3u};
+use crate::geom::{Coord, Cube, Index, InvertedRegion, Region};
+use crate::cross::mat::{FullyGenericMaterial, Material, Vacuum};
+use crate::cross::real::Real;
+use crate::cross::step::SimMeta;
+use crate::cross::vec::{Vec3, Vec3u};
 use crate::stim::{AbstractStimulus, NoopStimulus};
-use ndarray::Zip;
 use rayon::prelude::*;
 use serde::{Serialize, Deserialize};
 use std::iter::Sum;
@@ -11,175 +12,11 @@ pub mod legacy;
 pub mod spirv;
 pub mod units;

-pub use legacy::StaticSim;
+use spirv::{CpuBackend, SpirvSim};

+pub type StaticSim = SpirvSim<f32, Vacuum, CpuBackend>;
+pub type GenericSim<R> = SpirvSim<R, FullyGenericMaterial<R>, CpuBackend>;

-pub trait MaterialSim: GenericSim {
-    type Material: PartialEq;
-
-    fn put_material<C: Coord, M: Into<Self::Material>>(&mut self, pos: C, mat: M);
-    // XXX: would ideally return by-ref, but some backends need to return a handle instead
-    fn get_material<C: Coord>(&self, pos: C) -> Self::Material;
-
-    fn fill_region_using<C, Reg, F, M>(&mut self, region: &Reg, f: F)
-    where
-        Reg: Region,
-        F: Fn(C) -> M,
-        C: Coord,
-        M: Into<Self::Material>
-    {
-        for z in 0..self.depth() {
-            for y in 0..self.height() {
-                for x in 0..self.width() {
-                    let loc = Index((x, y, z).into());
-                    let meters = loc.to_meters(self.feature_size());
-                    if region.contains(meters) {
-                        self.put_material(loc, f(C::from_either(loc, meters)));
-                    }
-                }
-            }
-        }
-    }
-
-    fn fill_region<Reg: Region, M: Into<Self::Material> + Clone>(&mut self, region: &Reg, mat: M) {
-        self.fill_region_using(region, |_idx: Index| mat.clone());
-    }
-
-    fn examine_region<C, Reg, F>(&self, region: &Reg, mut f: F)
-    where
-        Reg: Region,
-        F: FnMut(C, &Self::Material),
-        C: Coord
-    {
-        for z in 0..self.depth() {
-            for y in 0..self.height() {
-                for x in 0..self.width() {
-                    let loc = Index((x, y, z).into());
-                    let meters = loc.to_meters(self.feature_size());
-                    if region.contains(meters) {
-                        f(C::from_either(loc, meters), &self.get_material(loc));
-                    }
-                }
-            }
-        }
-    }
-
-    /// Return true if the given region is filled exclusively with the provided material.
-    fn test_region_filled<Reg: Region, M: Into<Self::Material> + Clone>(&self, region: &Reg, mat: M) -> bool {
-        let mut all = true;
-        self.examine_region(region, |_idx: Index, m: &Self::Material| {
-            all = all && m == &mat.clone().into();
-        });
-        all
-    }
-
-    /// Fill the boundary, where `thickness` describes how far the boundary extends in each
-    /// direction, and `f` takes a vec where each coordinate represents how far into the boundary
-    /// the location being queried is, in each direction.
-    /// e.g. `f((1.0, 0.0, 0.2))` means the location being queried is at either extreme end on the
-    /// x axis, is not inside the y axis boundary, and is 20% of the way from the onset of the z
-    /// boundary to the edge of the z world.
-    fn fill_boundary_using<C, F, M>(&mut self, thickness: C, f: F)
-    where
-        C: Coord,
-        F: Fn(Vec3<f32>) -> M,
-        M: Into<Self::Material>,
-    {
-        // TODO: maybe this function belongs on the Driver?
-        let feat = self.feature_size();
-        let upper_left = thickness.to_index(feat);
-        let size = self.size();
-        let lower_right = size - upper_left - Index::new(1, 1, 1);
-        let region = InvertedRegion::new(Cube::new(upper_left.to_meters(feat), lower_right.to_meters(feat)));
-        self.fill_region_using(&region, |loc: Index| {
-            let depth_x = if loc.x() < upper_left.x() {
-                (upper_left.x() - loc.x()) as f32 / upper_left.x() as f32
-            } else if loc.x() > lower_right.x() {
-                (loc.x() - lower_right.x()) as f32 / upper_left.x() as f32
-            } else {
-                0.0
-            };
-            let depth_y = if loc.y() < upper_left.y() {
-                (upper_left.y() - loc.y()) as f32 / upper_left.y() as f32
-            } else if loc.y() > lower_right.y() {
-                (loc.y() - lower_right.y()) as f32 / upper_left.y() as f32
-            } else {
-                0.0
-            };
-            let depth_z = if loc.z() < upper_left.z() {
-                (upper_left.z() - loc.z()) as f32 / upper_left.z() as f32
-            } else if loc.z() > lower_right.z() {
-                (loc.z() - lower_right.z()) as f32 / upper_left.z() as f32
-            } else {
-                0.0
-            };
-            // println!("{} {}", loc, Vec3::new(depth_x, depth_y, depth_z));
-            f(Vec3::new(depth_x, depth_y, depth_z))
-        });
-    }
-}
-
-// XXX the Send/Sync bounds here could be removed with some refactoring
-pub trait GenericSim: SampleableSim {
-    fn step(&mut self) {
-        // XXX: try not to exercise this path! NoopStimulus is probably a lot of waste.
-        self.step_multiple(1, &NoopStimulus);
-    }
-    fn step_multiple<S: AbstractStimulus>(&mut self, num_steps: u32, s: &S);
-
-    /// DEPRECATED. Use stimulus instead
-    fn impulse_e_meters(&mut self, pos: Meters, amount: Vec3<f32>);
-    /// DEPRECATED. Use stimulus instead
-    fn impulse_h_meters(&mut self, pos: Meters, amount: Vec3<f32>) {
-        self.impulse_b_meters(pos, amount * f32::mu0());
-    }
-    /// DEPRECATED. Use stimulus instead
-    fn impulse_b_meters(&mut self, pos: Meters, amount: Vec3<f32>) {
-        self.impulse_h_meters(pos, amount * f32::mu0_inv());
-    }
-
-    fn impulse_e<C: Coord>(&mut self, pos: C, amt: Vec3<f32>) {
-        self.impulse_e_meters(pos.to_meters(self.feature_size()), amt)
-    }
-
-    fn impulse_h<C: Coord>(&mut self, pos: C, amt: Vec3<f32>) {
-        self.impulse_h_meters(pos.to_meters(self.feature_size()), amt)
-    }
-
-    fn impulse_b<C: Coord>(&mut self, pos: C, amt: Vec3<f32>) {
-        self.impulse_b_meters(pos.to_meters(self.feature_size()), amt)
-    }
-
-    fn impulse_ex<C: Coord>(&mut self, c: C, ex: f32) {
-        self.impulse_e(c, Vec3::new_x(ex));
-    }
-    fn impulse_ey<C: Coord>(&mut self, c: C, ey: f32) {
-        self.impulse_e(c, Vec3::new_y(ey));
-    }
-    fn impulse_ez<C: Coord>(&mut self, c: C, ez: f32) {
-        self.impulse_e(c, Vec3::new_z(ez));
-    }
-
-    fn impulse_hx<C: Coord>(&mut self, c: C, hx: f32) {
-        self.impulse_h(c, Vec3::new_x(hx));
-    }
-    fn impulse_hy<C: Coord>(&mut self, c: C, hy: f32) {
-        self.impulse_h(c, Vec3::new_y(hy));
-    }
-    fn impulse_hz<C: Coord>(&mut self, c: C, hz: f32) {
-        self.impulse_h(c, Vec3::new_z(hz));
-    }
-
-    fn impulse_bx<C: Coord>(&mut self, c: C, bx: f32) {
-        self.impulse_b(c, Vec3::new_x(bx));
-    }
-    fn impulse_by<C: Coord>(&mut self, c: C, by: f32) {
-        self.impulse_b(c, Vec3::new_y(by));
-    }
-    fn impulse_bz<C: Coord>(&mut self, c: C, bz: f32) {
-        self.impulse_b(c, Vec3::new_z(bz));
-    }
-}

 /// Conceptually, one cell looks like this (in 2d):
 ///
@@ -243,20 +80,99 @@ pub trait GenericSim: SampleableSim {
 ///          \|            |            |
 ///           +------------+------------+
 ///
-#[derive(Clone, Default, Serialize, Deserialize)]
-pub struct Sample {
-    state: CellStateWithM<f32>,
-    conductivity: Vec3<f32>,
+pub struct Sample<'a, R, M> {
+    fields: Fields<R>,
+    material: &'a M,
+}
+
+impl<'a, R: Real, M> Sample<'a, R, M> {
+    pub fn fields(&self) -> Fields<R> {
+        self.fields
+    }
+    pub fn material(&self) -> &'a M {
+        self.material
+    }
+
+    pub fn e(&self) -> Vec3<R> {
+        self.fields.e()
+    }
+    pub fn ex(&self) -> R {
+        self.e().x()
+    }
+    pub fn ey(&self) -> R {
+        self.e().y()
+    }
+    pub fn ez(&self) -> R {
+        self.e().z()
+    }
+
+    pub fn h(&self) -> Vec3<R> {
+        self.fields.h()
+    }
+    pub fn hx(&self) -> R {
+        self.h().x()
+    }
+    pub fn hy(&self) -> R {
+        self.h().y()
+    }
+    pub fn hz(&self) -> R {
+        self.h().z()
+    }
+
+    pub fn b(&self) -> Vec3<R> {
+        self.fields.b()
+    }
+    pub fn bx(&self) -> R {
+        self.b().x()
+    }
+    pub fn by(&self) -> R {
+        self.b().y()
+    }
+    pub fn bz(&self) -> R {
+        self.b().z()
+    }
+
+    pub fn m(&self) -> Vec3<R> {
+        self.fields.m()
+    }
+}
+
+impl<'a, R: Real, M: Material<R>> Sample<'a, R, M> {
+    pub fn conductivity(&self) -> Vec3<R> {
+        self.material.conductivity()
+    }
+
+    pub fn current_density(&self) -> Vec3<R> {
+        // TODO: justify/derive this.
+        // i guess the actual current density is the gradient of E multiplied by conductivity,
+        // and that when summed over a loop the negative terms from the neighbors are canceled
+        // when we grab their current density?
+        //
+        // or maybe it's $V = \int{E . dl} = IR$, so $I = \int{\sigma E . dl}$
+        // in which case this might not be "current density", but something related.
+        let conductivity = self.conductivity();
+        self.e().elem_mul(conductivity)
+    }
 }

 #[derive(Copy, Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
-pub struct CellStateWithM<R> {
+pub struct Fields<R> {
    e: Vec3<R>,
    h: Vec3<R>,
    m: Vec3<R>,
 }

-impl<R: Real> CellStateWithM<R> {
+impl<R: Real> Fields<R> {
+    pub fn new(e: Vec3<R>, h: Vec3<R>, m: Vec3<R>) -> Self {
+        Self { e, h, m }
+    }
+    pub fn cast<R2: Real>(&self) -> Fields<R2> {
+        Fields {
+            e: self.e.cast(),
+            h: self.h.cast(),
+            m: self.m.cast(),
+        }
+    }
    pub fn e(&self) -> Vec3<R> {
        self.e
    }
@@ -269,85 +185,57 @@ impl<R: Real> CellStateWithM<R> {
    pub fn b(&self) -> Vec3<R> {
        (self.h() + self.m()) * R::mu0()
    }
-}
-
-impl<'a> dyn SampleableSim + 'a {
-    pub fn get<C: Coord>(&self, at: C) -> Sample {
-        self.sample(at.to_meters(self.feature_size()))
-    }
-
-    /// Apply `F` to each Cell, and sum the results.
-    pub fn map_sum<F, Ret>(&self, f: F) -> Ret
-    where
-          F: Fn(&Sample) -> Ret + Sync,
-          Ret: Sum<Ret> + Send,
-    {
-        self.map_sum_enumerated(|_at: Index, cell| f(cell))
-    }
-
-    pub fn map_sum_enumerated<C, F, Ret>(&self, f: F) -> Ret
-    where C: Coord,
-          F: Fn(C, &Sample) -> Ret + Sync,
-          Ret: Sum<Ret> + Send,
-    {
-        let (w, h, d) = (self.width(), self.height(), self.depth());
-        (0..d).into_par_iter().map(
-            |z| (0..h).into_par_iter().map_with(z,
-            |&mut z, y| (0..w).into_par_iter().map_with((z, y),
-            |&mut (z, y), x|
-        {
-            let at = Index(Vec3u::new(x, y, z));
-            f(C::from_index(at, self.feature_size()), &self.get(at))
-        }))).flatten().flatten().sum()
-    }
-
-    pub fn volume_of_region<R: Region + ?Sized>(&self, region: &R) -> u32 {
-        self.map_sum_over(region, |_| 1)
-    }
-
-    /// Apply `F` to each Cell, and sum the results.
-    pub fn map_sum_over<F, Ret, Reg>(&self, region: &Reg, f: F) -> Ret
-    where
-          F: Fn(&Sample) -> Ret + Sync,
-          Ret: Sum<Ret> + Default + Send,
-          Reg: Region + ?Sized
-    {
-        self.map_sum_over_enumerated(region, |_at: Index, cell| f(cell))
-    }
-
-    pub fn map_sum_over_enumerated<C, F, Ret, Reg>(&self, region: &Reg, f: F) -> Ret
-    where C: Coord,
-          F: Fn(C, &Sample) -> Ret + Sync,
-          Ret: Sum<Ret> + Default + Send,
-          Reg: Region + ?Sized,
-    {
-        let (w, h, d) = (self.width(), self.height(), self.depth());
-        (0..d).into_par_iter().map(
-            |z| (0..h).into_par_iter().map_with(z,
-            |&mut z, y| (0..w).into_par_iter().map_with((z, y),
-            |&mut (z, y), x|
-        {
-            let at = Index(Vec3u::new(x, y, z));
-            let meters = at.to_meters(self.feature_size());
-            if region.contains(meters) {
-                f(C::from_index(at, self.feature_size()), &self.get(at))
-            } else {
-                Default::default()
-            }
-        }))).flatten().flatten().sum()
-    }
-
-    pub fn current<C: Coord>(&self, c: C) -> Vec3<f32> {
-        self.get(c).current_density() * self.feature_size() * self.feature_size()
+    pub fn with_material<'a, M>(self, material: &'a M) -> Sample<'a, R, M> {
+        Sample {
+            fields: self,
+            material,
+        }
    }
 }

-// XXX the Send/Sync bounds here could be removed with some refactoring
-pub trait SampleableSim: Send + Sync {
-    fn sample(&self, pos: Meters) -> Sample;
+// TODO: the Sync bound here could be removed with some refactoring
+pub trait AbstractSim: Sync {
+    type Real: Real;
+    type Material: Material<Self::Real>;

-    fn size(&self) -> Index;
-    fn feature_size(&self) -> f32;
+    // TODO: should return SimMeta<Self::Real>?
+    fn meta(&self) -> SimMeta<f32>;
+    fn step_no(&self) -> u64;
+    fn fields_at_index(&self, pos: Index) -> Fields<Self::Real>;
+    fn get_material_index(&self, at: Index) -> &Self::Material;
+    fn put_material_index(&mut self, at: Index, m: Self::Material);
+    fn step_multiple<S: AbstractStimulus>(&mut self, num_steps: u32, s: &S);
+
+    /// Take a "snapshot" of the simulation, dropping all material-specific information.
+    fn to_static(&self) -> StaticSim;
+    fn to_generic(&self) -> GenericSim<Self::Real>;
+
+
+    //--- HELPER METHODS below (derived) ---//
+
+    fn get_material<C: Coord>(&self, pos: C) -> &Self::Material {
+        self.get_material_index(pos.to_index(self.feature_size()))
+    }
+    fn put_material<C: Coord, M: Into<Self::Material>>(&mut self, pos: C, mat: M) {
+        self.put_material_index(pos.to_index(self.feature_size()), mat.into())
+    }
+
+    fn sample<'a, C: Coord>(&'a self, pos: C) -> Sample<'a, Self::Real, Self::Material> {
+        self.fields_at_index(pos.to_index(self.feature_size()))
+            .with_material(self.get_material(pos))
+    }
+
+    fn step(&mut self) {
+        // XXX: try not to exercise this path! NoopStimulus is probably a lot of waste.
+        self.step_multiple(1, &NoopStimulus);
+    }
+
+    fn size(&self) -> Index {
+        Index(self.meta().dim)
+    }
+    fn feature_size(&self) -> f32 {
+        self.meta().feature_size
+    }
    fn feature_volume(&self) -> f32 {
        let f = self.feature_size();
        f*f*f
@@ -356,9 +244,9 @@ pub trait SampleableSim: Send + Sync {
        let s = self.size().to_meters(self.feature_size());
        s.x() * s.y() * s.z()
    }
-    fn timestep(&self) -> f32;
-
-    fn step_no(&self) -> u64;
+    fn timestep(&self) -> f32 {
+        self.meta().time_step
+    }

    fn width(&self) -> u32 {
        self.size().x()
@@ -373,22 +261,171 @@ pub trait SampleableSim: Send + Sync {
        self.timestep() * self.step_no() as f32
    }

-    /// Take a "snapshot" of the simulation, dropping all material-specific information.
-    fn to_static(&self) -> StaticSim {
-        let mut state = legacy::SimState::new(self.size(), self.feature_size());
-        state.step_no = self.step_no();
-        Zip::from(ndarray::indices_of(&state.cells)).and(&mut state.e).and(&mut state.h).par_map_assign_into(
-            &mut state.cells,
-            |(z, y, x), e, h| {
-                let idx = Index((x as u32, y as u32, z as u32).into());
-                let cell = self.sample(idx.to_meters(self.feature_size()));
-                *e = cell.e().cast();
-                *h = cell.h().cast();
-                legacy::mat::Static {
-                    conductivity: cell.conductivity().cast(),
-                    m: cell.m().cast(),
+    // TODO: these should all live off-trait as some sort of `SimExt` thing.
+
+    /// Apply `F` to each Cell, and sum the results.
+    fn map_sum<F, Ret>(&self, f: F) -> Ret
+    where
+          F: Fn(&Sample<'_, Self::Real, Self::Material>) -> Ret + Sync,
+          Ret: Sum<Ret> + Send,
+    {
+        self.map_sum_enumerated(|_at: Index, cell| f(cell))
+    }
+
+    fn map_sum_enumerated<C, F, Ret>(&self, f: F) -> Ret
+    where C: Coord,
+          F: Fn(C, &Sample<'_, Self::Real, Self::Material>) -> Ret + Sync,
+          Ret: Sum<Ret> + Send,
+    {
+        let (w, h, d) = (self.width(), self.height(), self.depth());
+        (0..d).into_par_iter().map(
+            |z| (0..h).into_par_iter().map_with(z,
+            |&mut z, y| (0..w).into_par_iter().map_with((z, y),
+            |&mut (z, y), x|
+        {
+            let at = Index(Vec3u::new(x, y, z));
+            f(C::from_index(at, self.feature_size()), &self.sample(at))
+        }))).flatten().flatten().sum()
+    }
+
+    fn volume_of_region<R: Region + ?Sized>(&self, region: &R) -> u32 {
+        self.map_sum_over(region, |_| 1)
+    }
+
+    /// Apply `F` to each Cell, and sum the results.
+    fn map_sum_over<F, Ret, Reg>(&self, region: &Reg, f: F) -> Ret
+    where
+          F: Fn(&Sample<'_, Self::Real, Self::Material>) -> Ret + Sync,
+          Ret: Sum<Ret> + Default + Send,
+          Reg: Region + ?Sized
+    {
+        self.map_sum_over_enumerated(region, |_at: Index, cell| f(cell))
+    }
+
+    fn map_sum_over_enumerated<C, F, Ret, Reg>(&self, region: &Reg, f: F) -> Ret
+    where C: Coord,
+          F: Fn(C, &Sample<'_, Self::Real, Self::Material>) -> Ret + Sync,
+          Ret: Sum<Ret> + Default + Send,
+          Reg: Region + ?Sized,
+    {
+        let (w, h, d) = (self.width(), self.height(), self.depth());
+        (0..d).into_par_iter().map(
+            |z| (0..h).into_par_iter().map_with(z,
+            |&mut z, y| (0..w).into_par_iter().map_with((z, y),
+            |&mut (z, y), x|
+        {
+            let at = Index(Vec3u::new(x, y, z));
+            let meters = at.to_meters(self.feature_size());
+            if region.contains(meters) {
+                f(C::from_index(at, self.feature_size()), &self.sample(at))
+            } else {
+                Default::default()
+            }
+        }))).flatten().flatten().sum()
+    }
+
+    fn current<C: Coord>(&self, c: C) -> Vec3<f32> {
+        self.sample(c).current_density().cast::<f32>() * self.feature_size() * self.feature_size()
+    }
+
+    fn fill_region_using<C, Reg, F, M>(&mut self, region: &Reg, f: F)
+    where
+        Reg: Region,
+        F: Fn(C) -> M,
+        C: Coord,
+        M: Into<Self::Material>
+    {
+        for z in 0..self.depth() {
+            for y in 0..self.height() {
+                for x in 0..self.width() {
+                    let loc = Index((x, y, z).into());
+                    let meters = loc.to_meters(self.feature_size());
+                    if region.contains(meters) {
+                        self.put_material(loc, f(C::from_either(loc, meters)));
+                    }
                }
-            });
-        state
+            }
+        }
+    }
+
+    fn fill_region<Reg: Region, M: Into<Self::Material> + Clone>(&mut self, region: &Reg, mat: M) {
+        self.fill_region_using(region, |_idx: Index| mat.clone());
+    }
+
+    fn examine_region<C, Reg, F>(&self, region: &Reg, mut f: F)
+    where
+        Reg: Region,
+        F: FnMut(C, &Self::Material),
+        C: Coord
+    {
+        for z in 0..self.depth() {
+            for y in 0..self.height() {
+                for x in 0..self.width() {
+                    let loc = Index((x, y, z).into());
+                    let meters = loc.to_meters(self.feature_size());
+                    if region.contains(meters) {
+                        f(C::from_either(loc, meters), self.get_material(loc));
+                    }
+                }
+            }
+        }
+    }
+
+    /// Return true if the given region is filled exclusively with the provided material.
+    fn test_region_filled<Reg: Region, M>(&self, region: &Reg, mat: M) -> bool
+    where
+        M: Into<Self::Material> + Clone,
+        Self::Material: PartialEq,
+    {
+        let mut all = true;
+        self.examine_region(region, |_idx: Index, m: &Self::Material| {
+            all = all && m == &mat.clone().into();
+        });
+        all
+    }
+
+    /// Fill the boundary, where `thickness` describes how far the boundary extends in each
+    /// direction, and `f` takes a vec where each coordinate represents how far into the boundary
+    /// the location being queried is, in each direction.
+    /// e.g. `f((1.0, 0.0, 0.2))` means the location being queried is at either extreme end on the
+    /// x axis, is not inside the y axis boundary, and is 20% of the way from the onset of the z
+    /// boundary to the edge of the z world.
+    fn fill_boundary_using<C, F, M>(&mut self, thickness: C, f: F)
+    where
+        C: Coord,
+        F: Fn(Vec3<f32>) -> M,
+        M: Into<Self::Material>,
+    {
+        // TODO: maybe this function belongs on the Driver?
+        let feat = self.feature_size();
+        let upper_left = thickness.to_index(feat);
+        let size = self.size();
+        let lower_right = size - upper_left - Index::new(1, 1, 1);
+        let region = InvertedRegion::new(Cube::new(upper_left.to_meters(feat), lower_right.to_meters(feat)));
+        self.fill_region_using(&region, |loc: Index| {
+            let depth_x = if loc.x() < upper_left.x() {
+                (upper_left.x() - loc.x()) as f32 / upper_left.x() as f32
+            } else if loc.x() > lower_right.x() {
+                (loc.x() - lower_right.x()) as f32 / upper_left.x() as f32
+            } else {
+                0.0
+            };
+            let depth_y = if loc.y() < upper_left.y() {
+                (upper_left.y() - loc.y()) as f32 / upper_left.y() as f32
+            } else if loc.y() > lower_right.y() {
+                (loc.y() - lower_right.y()) as f32 / upper_left.y() as f32
+            } else {
+                0.0
+            };
+            let depth_z = if loc.z() < upper_left.z() {
+                (upper_left.z() - loc.z()) as f32 / upper_left.z() as f32
+            } else if loc.z() > lower_right.z() {
+                (loc.z() - lower_right.z()) as f32 / upper_left.z() as f32
+            } else {
+                0.0
+            };
+            // println!("{} {}", loc, Vec3::new(depth_x, depth_y, depth_z));
+            f(Vec3::new(depth_x, depth_y, depth_z))
+        });
    }
 }
--- a/crates/coremem/src/sim/spirv/bindings.rs
+++ b/crates/coremem/src/sim/spirv/bindings.rs
@@ -1,94 +0,0 @@
-use serde::{Deserialize, Serialize};
-
-use crate::geom::Index;
-use coremem_types::vec::{Vec3, Vec3u};
-
-/// hide the actual spirv backend structures inside a submodule to make their use/boundary clear.
-mod ffi {
-    pub use spirv_backend::entry_points;
-    pub use spirv_backend::sim::SerializedSimMeta;
-    pub use spirv_backend::support::Optional;
-}
-
-// conversion traits for types defined cross-lib
-pub trait IntoFfi {
-    type Ffi;
-    fn into_ffi(self) -> Self::Ffi;
-}
-
-pub trait IntoLib {
-    type Lib;
-    fn into_lib(self) -> Self::Lib;
-}
-
-pub trait Identity { }
-impl<T: Identity> IntoFfi for T {
-    type Ffi = Self;
-    fn into_ffi(self) -> Self::Ffi {
-        self
-    }
-}
-impl<T: Identity> IntoLib for T {
-    type Lib = Self;
-    fn into_lib(self) -> Self::Lib {
-        self
-    }
-}
-
-impl Identity for f32 {}
-impl<'a> Identity for &'a str {}
-impl<T0: Identity, T1: Identity> Identity for (T0, T1) {}
-impl Identity for Vec3u {}
-impl<T: Identity> Identity for Vec3<T> {}
-
-impl<L: IntoFfi> IntoFfi for Option<L>
-    where L::Ffi: Default
-{
-    type Ffi = ffi::Optional<L::Ffi>;
-    fn into_ffi(self) -> Self::Ffi {
-        match self {
-            Some(s) => ffi::Optional::some(s.into_ffi()),
-            None => ffi::Optional::none(),
-        }
-    }
-}
-
-impl<F: Copy + IntoLib> IntoLib for ffi::Optional<F> {
-    type Lib = Option<F::Lib>;
-    fn into_lib(self) -> Self::Lib {
-        if self.is_some() {
-            Some(self.unwrap().into_lib())
-        } else {
-            None
-        }
-    }
-}
-
-
-// TODO: if we lift this type out of the spirv crate we can derive serde based on a feature flag
-// this is bitwise- and type-compatible with the spirv SimMeta, except we need serde traits
-#[derive(Clone, Default, Serialize, Deserialize)]
-pub struct SimMeta<R> {
-    pub(crate) dim: Index,
-    pub(crate) inv_feature_size: R,
-    pub(crate) time_step: R,
-    pub(crate) feature_size: R,
-}
-
-impl<R: IntoFfi> IntoFfi for SimMeta<R> {
-    type Ffi = ffi::SerializedSimMeta<R::Ffi>;
-    fn into_ffi(self) -> Self::Ffi {
-        Self::Ffi {
-            dim: self.dim.0.into_ffi(),
-            inv_feature_size: self.inv_feature_size.into_ffi(),
-            time_step: self.time_step.into_ffi(),
-            feature_size: self.feature_size.into_ffi(),
-        }
-    }
-}
-
-// FUNCTION BINDINGS
-pub fn entry_points<L: 'static>() -> Option<(&'static str, &'static str)>
-{
-    ffi::entry_points::<L>().into_lib()
-}
--- a/crates/coremem/src/sim/spirv/cpu.rs
+++ b/crates/coremem/src/sim/spirv/cpu.rs
@@ -0,0 +1,46 @@
+use coremem_cross::mat::Material;
+use coremem_cross::real::Real;
+use coremem_cross::step::{SimMeta, StepEContext, StepHContext};
+use coremem_cross::vec::{Vec3, Vec3u};
+
+use super::SimBackend;
+
+#[derive(Default)]
+pub struct CpuBackend;
+
+impl<R: Real, M: Material<R>> SimBackend<R, M> for CpuBackend {
+    fn step_n(
+        &mut self,
+        meta: SimMeta<R>,
+        mat: &[M],
+        stim_e: &[Vec3<R>],
+        stim_h: &[Vec3<R>],
+        e: &mut [Vec3<R>],
+        h: &mut [Vec3<R>],
+        m: &mut [Vec3<R>],
+        num_steps: u32,
+    ) {
+        for _ in 0..num_steps {
+            // step E field
+            apply_all_cells(meta.dim, |idx| {
+                StepEContext::step_flat_view(meta, mat, stim_e, e, h, idx);
+            });
+
+            // step H field
+            apply_all_cells(meta.dim, |idx| {
+                StepHContext::step_flat_view(meta, mat, stim_h, e, h, m, idx);
+            });
+        }
+    }
+}
+
+fn apply_all_cells<F: FnMut(Vec3u)>(dim: Vec3u, mut f: F) {
+    for z in 0..dim.z() {
+        for y in 0..dim.y() {
+            for x in 0..dim.x() {
+                f(Vec3u::new(x, y, z));
+            }
+        }
+    }
+}
+
--- a/crates/coremem/src/sim/spirv/gpu.rs
+++ b/crates/coremem/src/sim/spirv/gpu.rs
@@ -0,0 +1,444 @@
+use futures::FutureExt as _;
+use log::info;
+use std::borrow::Cow;
+use std::num::NonZeroU64;
+use wgpu;
+use wgpu::util::DeviceExt as _;
+
+use coremem_cross::vec::{Vec3, Vec3u};
+use coremem_cross::step::SimMeta;
+
+use spirv_backend::HasEntryPoints;
+
+use super::SimBackend;
+
+#[derive(Default)]
+pub struct WgpuBackend {
+    handles: Option<(&'static str /* step_h */, &'static str /* step_e */, WgpuHandles)>,
+}
+
+struct WgpuHandles {
+    step_bind_group_layout: wgpu::BindGroupLayout,
+    step_e_pipeline: wgpu::ComputePipeline,
+    step_h_pipeline: wgpu::ComputePipeline,
+    device: wgpu::Device,
+    queue: wgpu::Queue,
+}
+
+impl WgpuHandles {
+    fn open<R, M: HasEntryPoints<R>>(dim: Vec3u) -> Self {
+        info!("WgpuHandles::open({})", dim);
+        use std::mem::size_of;
+        let volume = dim.product_sum_usize() as u64;
+        let max_elem_size = size_of::<M>().max(size_of::<Vec3<R>>());
+        let max_array_size = volume * max_elem_size as u64;
+        let max_buf_size = max_array_size + 0x1000; // allow some overhead
+
+        let (device, queue) = futures::executor::block_on(open_device(max_buf_size));
+        let shader_binary = get_shader();
+        let shader_module = unsafe { device.create_shader_module_spirv(&shader_binary) };
+        let (step_bind_group_layout, step_h_pipeline, step_e_pipeline) = make_pipelines(
+            &device, &shader_module, M::step_h(), M::step_e()
+        );
+        WgpuHandles {
+            step_bind_group_layout,
+            step_h_pipeline,
+            step_e_pipeline,
+            device,
+            queue,
+        }
+    }
+}
+
+// TODO: these bounds aren't 100% right. we're sending R and M over to the GPU by a bitwise copy.
+// that probably means the types should be Send + Copy
+impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBackend {
+    fn step_n(
+        &mut self,
+        meta: SimMeta<R>,
+        mat: &[M],
+        stim_cpu_e: &[Vec3<R>],
+        stim_cpu_h: &[Vec3<R>],
+        e: &mut [Vec3<R>],
+        h: &mut [Vec3<R>],
+        m: &mut [Vec3<R>],
+        num_steps: u32,
+    ) {
+        let field_bytes = meta.dim.product_sum() as usize * std::mem::size_of::<Vec3<f32>>();
+
+        let (step_h, step_e, handles) = self.handles.get_or_insert_with(|| (
+            M::step_h(),
+            M::step_e(),
+            WgpuHandles::open::<R, M>(meta.dim)
+        ));
+        // if device is opened, make sure we're open for the right types
+        assert_eq!(*step_h, M::step_h());
+        assert_eq!(*step_e, M::step_e());
+        let device = &handles.device;
+        let queue = &handles.queue;
+        let step_bind_group_layout = &handles.step_bind_group_layout;
+        let step_e_pipeline = &handles.step_e_pipeline;
+        let step_h_pipeline = &handles.step_h_pipeline;
+        let sim_meta_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu-side simulation metadata"),
+            contents: to_bytes(&[meta][..]),
+            usage: wgpu::BufferUsages::STORAGE,
+        });
+
+        let stim_e_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu-side stimulus e field"),
+            contents: to_bytes(stim_cpu_e),
+            usage: wgpu::BufferUsages::STORAGE
+        });
+
+        let stim_h_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu-side stimulus h field"),
+            contents: to_bytes(stim_cpu_h),
+            usage: wgpu::BufferUsages::STORAGE
+        });
+
+        let mat_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu-side materials matrix"),
+            contents: to_bytes(mat),
+            // Can be used by the GPU and copied back to the CPU
+            usage: wgpu::BufferUsages::STORAGE,
+        });
+
+        let e_field_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu-side in/out e field"),
+            contents: to_bytes(e),
+            usage: wgpu::BufferUsages::STORAGE.union(wgpu::BufferUsages::COPY_SRC),
+        });
+
+        let h_field_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu-side in/out h field"),
+            contents: to_bytes(h),
+            // Can be used by the GPU and copied back to the CPU
+            usage: wgpu::BufferUsages::STORAGE.union(wgpu::BufferUsages::COPY_SRC),
+        });
+
+        let m_field_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu-side in/out m field"),
+            contents: to_bytes(m),
+            // Can be used by the GPU and copied back to the CPU
+            usage: wgpu::BufferUsages::STORAGE.union(wgpu::BufferUsages::COPY_SRC),
+        });
+
+        let e_readback_buffer = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("cpu-side copy of e output buffer"),
+            size: field_bytes as wgpu::BufferAddress,
+            // Can be read to the CPU, and can be copied from the shader's storage buffer
+            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        let h_readback_buffer = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("cpu-side copy of h output buffer"),
+            size: field_bytes as wgpu::BufferAddress,
+            // Can be read to the CPU, and can be copied from the shader's storage buffer
+            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        let m_readback_buffer = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("cpu-side copy of m output buffer"),
+            size: field_bytes as wgpu::BufferAddress,
+            // Can be read to the CPU, and can be copied from the shader's storage buffer
+            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &step_bind_group_layout,
+            entries: &[
+                wgpu::BindGroupEntry {
+                    binding: 0,
+                    resource: sim_meta_buffer.as_entire_binding(),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 1,
+                    resource: stim_e_buffer.as_entire_binding(),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 2,
+                    resource: stim_h_buffer.as_entire_binding(),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 3,
+                    resource: mat_buffer.as_entire_binding(),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 4,
+                    resource: e_field_buffer.as_entire_binding(),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 5,
+                    resource: h_field_buffer.as_entire_binding(),
+                },
+                wgpu::BindGroupEntry {
+                    binding: 6,
+                    resource: m_field_buffer.as_entire_binding(),
+                },
+            ],
+        });
+
+        let workgroups = ((meta.dim.x()+3) / 4, (meta.dim.y()+3) / 4, (meta.dim.z()+3) / 4);
+
+        let mut encoder =
+            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+        for _ in 0..num_steps {
+            {
+                let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
+                cpass.set_bind_group(0, &bind_group, &[]);
+                cpass.set_pipeline(&step_e_pipeline);
+                cpass.dispatch(workgroups.0, workgroups.1, workgroups.2);
+            }
+
+            {
+                let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
+                cpass.set_bind_group(0, &bind_group, &[]);
+                cpass.set_pipeline(&step_h_pipeline);
+                cpass.dispatch(workgroups.0, workgroups.1, workgroups.2);
+            }
+        }
+
+        encoder.copy_buffer_to_buffer(
+            &e_field_buffer,
+            0,
+            &e_readback_buffer,
+            0,
+            field_bytes as u64,
+        );
+
+        encoder.copy_buffer_to_buffer(
+            &h_field_buffer,
+            0,
+            &h_readback_buffer,
+            0,
+            field_bytes as u64,
+        );
+
+        encoder.copy_buffer_to_buffer(
+            &m_field_buffer,
+            0,
+            &m_readback_buffer,
+            0,
+            field_bytes as u64,
+        );
+
+        queue.submit(Some(encoder.finish()));
+
+        let e_readback_slice = e_readback_buffer.slice(..);
+        let e_readback_future = e_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
+                e.copy_from_slice(unsafe {
+                    from_bytes(e_readback_slice.get_mapped_range().as_ref())
+                });
+                e_readback_buffer.unmap();
+            });
+
+        let h_readback_slice = h_readback_buffer.slice(..);
+        let h_readback_future = h_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
+            h.copy_from_slice(unsafe {
+                from_bytes(h_readback_slice.get_mapped_range().as_ref())
+            });
+            h_readback_buffer.unmap();
+        });
+
+        let m_readback_slice = m_readback_buffer.slice(..);
+        let m_readback_future = m_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
+            m.copy_from_slice(unsafe {
+                from_bytes(m_readback_slice.get_mapped_range().as_ref())
+            });
+            m_readback_buffer.unmap();
+        });
+
+        device.poll(wgpu::Maintain::Wait);
+
+        futures::executor::block_on(futures::future::join(
+            e_readback_future, futures::future::join(
+                h_readback_future, m_readback_future)));
+    }
+}
+
+/// Convert an arbitrary slice into a byte slice
+fn to_bytes<T>(slice: &[T]) -> &[u8] {
+    unsafe {
+        std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * std::mem::size_of::<T>())
+    }
+}
+
+/// Convert a byte slice into a T slice
+unsafe fn from_bytes<T>(slice: &[u8]) -> &[T] {
+    let elem_size = std::mem::size_of::<T>();
+    let new_len = slice.len() / elem_size;
+    assert_eq!(new_len * elem_size, slice.len());
+    std::slice::from_raw_parts(slice.as_ptr() as *const T, new_len)
+}
+
+/// Loads the shader
+fn get_shader() -> wgpu::ShaderModuleDescriptorSpirV<'static> {
+    let data = spirv_backend_runner::spirv_module();
+    let spirv = Cow::Owned(wgpu::util::make_spirv_raw(&data).into_owned());
+
+    wgpu::ShaderModuleDescriptorSpirV {
+        label: None,
+        source: spirv,
+    }
+}
+
+async fn open_device(max_buf_size: u64) -> (wgpu::Device, wgpu::Queue) {
+    // based on rust-gpu/examples/runners/wgpu/src/compute.rs:start_internal
+    let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY);
+    info!("open_device: got instance");
+    let adapter = instance
+        .request_adapter(&wgpu::RequestAdapterOptions {
+            power_preference: wgpu::PowerPreference::HighPerformance,
+            force_fallback_adapter: false,
+            compatible_surface: None,
+        })
+        .await
+        .expect("Failed to find an appropriate adapter");
+    info!("open_device: got adapter");
+
+    // XXX not all adapters will support non-default limits, and it could
+    // cause perf degradations even on the ones that do. May want to consider
+    // folding some buffers together to avoid this.
+    let mut limits = wgpu::Limits::default();
+    //limits.max_bind_groups = 5;
+    //limits.max_dynamic_storage_buffers_per_pipeline_layout = 5;
+    limits.max_storage_buffers_per_shader_stage = 7;
+    //limits.max_storage_buffer_binding_size = 128 MiB.
+    //limits.max_storage_buffer_binding_size = 1024 * (1 << 20);
+    limits.max_storage_buffer_binding_size = max_buf_size.try_into().unwrap();
+
+    let (device, queue) = adapter
+        .request_device(
+            &wgpu::DeviceDescriptor {
+                label: None,
+                features: wgpu::Features::SPIRV_SHADER_PASSTHROUGH,
+                limits,
+            },
+            None,
+        )
+        .await
+        .expect("Failed to create device");
+    info!("open_device: got device");
+    (device, queue)
+}
+
+fn make_pipelines(
+    device: &wgpu::Device,
+    shader_module: &wgpu::ShaderModule,
+    entry_step_h: &'static str,
+    entry_step_e: &'static str
+) -> (
+    wgpu::BindGroupLayout, wgpu::ComputePipeline, wgpu::ComputePipeline
+) {
+    let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: None,
+        entries: &[
+            wgpu::BindGroupLayoutEntry {
+                // meta
+                binding: 0,
+                count: None,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
+                    ty: wgpu::BufferBindingType::Storage { read_only: true },
+                },
+            },
+            wgpu::BindGroupLayoutEntry {
+                // stimulus(e)
+                binding: 1,
+                count: None,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
+                    ty: wgpu::BufferBindingType::Storage { read_only: true },
+                },
+            },
+            wgpu::BindGroupLayoutEntry {
+                // stimulus(h)
+                binding: 2,
+                count: None,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
+                    ty: wgpu::BufferBindingType::Storage { read_only: true },
+                },
+            },
+            wgpu::BindGroupLayoutEntry {
+                // materials
+                binding: 3,
+                count: None,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
+                    ty: wgpu::BufferBindingType::Storage { read_only: true },
+                },
+            },
+            wgpu::BindGroupLayoutEntry {
+                // e field
+                binding: 4,
+                count: None,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                },
+            },
+            wgpu::BindGroupLayoutEntry {
+                // h field
+                binding: 5,
+                count: None,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                },
+            },
+            wgpu::BindGroupLayoutEntry {
+                // m field
+                binding: 6,
+                count: None,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                },
+            },
+        ],
+    });
+
+    let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+        label: None,
+        bind_group_layouts: &[&bind_group_layout],
+        push_constant_ranges: &[],
+    });
+
+    let compute_step_h_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+        label: None,
+        layout: Some(&pipeline_layout),
+        module: shader_module,
+        entry_point: entry_step_h,
+    });
+
+    let compute_step_e_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+        label: None,
+        layout: Some(&pipeline_layout),
+        module: shader_module,
+        entry_point: entry_step_e,
+    });
+
+    (bind_group_layout, compute_step_h_pipeline, compute_step_e_pipeline)
+}
+
--- a/crates/coremem/src/sim/spirv/mod.rs
+++ b/crates/coremem/src/sim/spirv/mod.rs
--- a/crates/coremem/src/stim.rs
+++ b/crates/coremem/src/stim.rs
@@ -1,12 +1,24 @@
 use crate::real::*;
-use crate::types::vec::Vec3;
+use crate::cross::vec::Vec3;
 use crate::geom::{Meters, Region};
 use rand;

-type Fields = (Vec3<f32>, Vec3<f32>);
+/// field densities
+#[derive(Clone, Copy, Debug, Default, PartialEq)]
+pub struct Fields {
+    pub e: Vec3<f32>,
+    pub h: Vec3<f32>,
+}
+
+/// field magnitude densities (really, signed magnitude)
+#[derive(Clone, Copy, Debug, Default, PartialEq)]
+pub struct FieldMags {
+    pub e: f32,
+    pub h: f32,
+}

 pub trait AbstractStimulus: Sync {
-    // TODO: might be cleaner to return some `Fields` type instead of a tuple
+    // TODO: using floats for time and position is not great.
    /// Return the (E, H) field which should be added PER-SECOND to the provided position/time.
    fn at(&self, t_sec: f32, pos: Meters) -> Fields;
 }
@@ -19,13 +31,13 @@ pub trait AbstractStimulus: Sync {

 impl<T: AbstractStimulus> AbstractStimulus for Vec<T> {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
-        let (mut e, mut h) = Fields::default();
+        let mut fields = Fields::default();
        for i in self {
-            let (de, dh) = i.at(t_sec, pos);
-            e += de;
-            h += dh;
+            let Fields { e, h } = i.at(t_sec, pos);
+            fields.e += e;
+            fields.h += h;
        }
-        (e, h)
+        fields
    }
 }

@@ -64,7 +76,7 @@ impl AbstractStimulus for UniformStimulus {
 impl TimeVarying for UniformStimulus {}
 impl TimeVarying3 for UniformStimulus {
    fn at(&self, _t_sec: f32) -> Fields {
-        (self.e, self.h)
+        Fields { e: self.e, h: self.h }
    }
 }

@@ -100,7 +112,10 @@ impl RngStimulus {

 impl AbstractStimulus for RngStimulus {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
-        (self.gen(t_sec, pos, self.e_scale, 0), self.gen(t_sec, pos, self.h_scale, 0x7de3))
+        Fields {
+            e: self.gen(t_sec, pos, self.e_scale, 0),
+            h: self.gen(t_sec, pos, self.h_scale, 0x7de3)
+        }
    }
 }

@@ -148,12 +163,16 @@ impl<R, T> CurlStimulus<R, T> {
 impl<R: Region + Sync, T: TimeVarying1 + Sync> AbstractStimulus for CurlStimulus<R, T> {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
        if self.region.contains(pos) {
-            let (amt_e, amt_h) = self.stim.at(t_sec);
+            let FieldMags { e, h } = self.stim.at(t_sec);
            let from_center_to_point = *pos - *self.center;
+            // TODO: is this inverted?
            let rotational = from_center_to_point.cross(*self.axis);
-            let impulse_e = rotational.with_mag(amt_e.cast());
-            let impulse_h = rotational.with_mag(amt_h.cast());
-            (impulse_e, impulse_h)
+            let impulse_e = rotational.with_mag(e.cast()).unwrap_or_default();
+            let impulse_h = rotational.with_mag(h.cast()).unwrap_or_default();
+            Fields {
+                e: impulse_e,
+                h: impulse_h,
+            }
        } else {
            Fields::default()
        }
@@ -171,7 +190,7 @@ pub trait TimeVarying: Sized {

 pub trait TimeVarying1: TimeVarying {
    /// Retrieve the (E, H) impulse to apply PER-SECOND at the provided time (in seconds).
-    fn at(&self, t_sec: f32) -> (f32, f32);
+    fn at(&self, t_sec: f32) -> FieldMags;
 }

 pub trait TimeVarying3: TimeVarying {
@@ -182,8 +201,8 @@ pub trait TimeVarying3: TimeVarying {
 // assumed to represent the E field
 impl TimeVarying for f32 {}
 impl TimeVarying1 for f32 {
-    fn at(&self, _t_sec: f32) -> (f32, f32) {
-        (*self, 0.0)
+    fn at(&self, _t_sec: f32) -> FieldMags {
+        FieldMags { e: *self, h: 0.0 }
    }
 }

@@ -226,20 +245,20 @@ impl<A> Sinusoid<A> {
 impl<A> TimeVarying for Sinusoid<A> {}

 impl TimeVarying1 for Sinusoid1 {
-    fn at(&self, t_sec: f32) -> (f32, f32) {
-        (
-            self.amp * (t_sec * self.omega).sin(),
-            0.0,
-        )
+    fn at(&self, t_sec: f32) -> FieldMags {
+        FieldMags {
+            e: self.amp * (t_sec * self.omega).sin(),
+            h: 0.0,
+        }
    }
 }

 impl TimeVarying3 for Sinusoid3 {
    fn at(&self, t_sec: f32) -> Fields {
-        (
-            self.amp * (t_sec * self.omega).sin(),
-            Vec3::zero(),
-        )
+        Fields {
+            e: self.amp * (t_sec * self.omega).sin(),
+            h: Vec3::zero(),
+        }
    }
 }

@@ -268,20 +287,20 @@ impl<A> Exp<A> {
 impl<A> TimeVarying for Exp<A> {}

 impl TimeVarying1 for Exp1 {
-    fn at(&self, t_sec: f32) -> (f32, f32) {
-        (
-            self.amp * (t_sec * -self.tau).exp(),
-            0.0,
-        )
+    fn at(&self, t_sec: f32) -> FieldMags {
+        FieldMags {
+            e: self.amp * (t_sec * -self.tau).exp(),
+            h: 0.0,
+        }
    }
 }

 impl TimeVarying3 for Exp3 {
    fn at(&self, t_sec: f32) -> Fields {
-        (
-            self.amp * (t_sec * -self.tau).exp(),
-            Vec3::zero(),
-        )
+        Fields {
+            e: self.amp * (t_sec * -self.tau).exp(),
+            h: Vec3::zero(),
+        }
    }
 }

@@ -300,7 +319,7 @@ impl<T> Gated<T> {

 impl<T> TimeVarying for Gated<T> {}
 impl<T: TimeVarying1> TimeVarying1 for Gated<T> {
-    fn at(&self, t_sec: f32) -> (f32, f32) {
+    fn at(&self, t_sec: f32) -> FieldMags {
        if (self.start..self.end).contains(&t_sec) {
            self.inner.at(t_sec)
        } else {
@@ -333,7 +352,7 @@ impl<T> Shifted<T> {

 impl<T> TimeVarying for Shifted<T> {}
 impl<T: TimeVarying1> TimeVarying1 for Shifted<T> {
-    fn at(&self, t_sec: f32) -> (f32, f32) {
+    fn at(&self, t_sec: f32) -> FieldMags {
        self.inner.at(t_sec - self.start_at)
    }
 }
@@ -353,9 +372,9 @@ mod test {
            let x = $x;
            let e = $e;
            let h = $h;
-            let diff_e = (x.0 - e).mag();
+            let diff_e = (x.e - e).mag();
            assert!(diff_e <= 0.001, "{:?} != {:?}", x, e);
-            let diff_h = (x.1 - h).mag();
+            let diff_h = (x.h - h).mag();
            assert!(diff_h <= 0.001, "{:?} != {:?}", x, h);
        }
    }
@@ -363,7 +382,7 @@ mod test {
    #[test]
    fn sinusoid3() {
        let s = Sinusoid3::new(Vec3::new(10.0, 1.0, -100.0), 1000.0);
-        assert_eq!(s.at(0.0), (Vec3::zero(), Vec3::zero()));
+        assert_eq!(s.at(0.0), Fields::default());
        assert_approx_eq!(s.at(0.00025),
            Vec3::new(10.0, 1.0, -100.0), Vec3::zero()
        );
@@ -374,7 +393,7 @@ mod test {
    #[test]
    fn sinusoid3_from_wavelength() {
        let s = Sinusoid3::from_wavelength(Vec3::new(10.0, 1.0, -100.0), 0.001);
-        assert_eq!(s.at(0.0), (Vec3::zero(), Vec3::zero()));
+        assert_eq!(s.at(0.0), Fields::default());
        assert_approx_eq!(s.at(0.00025), Vec3::new(10.0, 1.0, -100.0), Vec3::zero());
        assert_approx_eq!(s.at(0.00050), Vec3::zero(), Vec3::zero());
        assert_approx_eq!(s.at(0.00075), Vec3::new(-10.0, -1.0, 100.0), Vec3::zero());
--- a/crates/coremem/src/util/mod.rs
+++ b/crates/coremem/src/util/mod.rs
@@ -1 +0,0 @@
-pub mod cache;
--- a/crates/cross/Cargo.toml
+++ b/crates/cross/Cargo.toml
@@ -1,10 +1,11 @@
 [package]
-name = "coremem_types"
-version = "0.1.0"
+name = "coremem_cross"
+version = "0.2.0"
 authors = ["Colin <colin@uninsane.org>"]
 edition = "2021"

 [features]
+# some functionality does not compile for the spirv target, so we feature gate these.
 serde = [ "dep:serde" ]
 fmt = []

--- a/crates/cross/src/compound/enumerated.rs
+++ b/crates/cross/src/compound/enumerated.rs
--- a/crates/cross/src/compound/list/flat.rs
+++ b/crates/cross/src/compound/list/flat.rs
--- a/crates/cross/src/compound/list/linked.rs
+++ b/crates/cross/src/compound/list/linked.rs
--- a/crates/cross/src/compound/list/mod.rs
+++ b/crates/cross/src/compound/list/mod.rs
--- a/crates/cross/src/compound/list/tuple_consumer.rs
+++ b/crates/cross/src/compound/list/tuple_consumer.rs
--- a/crates/cross/src/compound/mod.rs
+++ b/crates/cross/src/compound/mod.rs
@@ -1,3 +1,6 @@
 pub mod enumerated;
 pub mod list;
+mod optional;
 pub mod peano;
+
+pub use optional::Optional;
--- a/crates/cross/src/compound/optional.rs
+++ b/crates/cross/src/compound/optional.rs
@@ -0,0 +1,86 @@
+#[cfg(feature = "serde")]
+use serde::{Serialize, Deserialize};
+
+/// This is a spirv-compatible option type.
+/// The native rust Option type produces invalid spirv due to its enum nature; this custom option
+/// type creates code which will actually compile.
+#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
+#[cfg_attr(feature = "fmt", derive(Debug))]
+#[derive(Copy, Clone, PartialEq)]
+pub struct Optional<T> {
+    // XXX: not a bool, because: "entrypoint parameter cannot contain a boolean"
+    present: u8,
+    data: T,
+}
+
+impl<T> Optional<T> {
+    pub fn some(data: T) -> Self {
+        Self {
+            present: 1,
+            data,
+        }
+    }
+
+    pub fn explicit_none(data: T) -> Self {
+        Self {
+            present: 0,
+            data,
+        }
+    }
+
+    pub fn is_some(self) -> bool {
+        self.present != 0
+    }
+
+    pub fn unwrap(self) -> T {
+        debug_assert!(self.present != 0);
+        self.data
+    }
+
+    pub fn map<U: Default, F: FnOnce(T) -> U>(self, f: F) -> Optional<U> {
+        self.and_then(|inner| Optional::some(f(inner)))
+    }
+
+    pub fn and_then<U: Default, F: FnOnce(T) -> Optional<U>>(self, f: F) -> Optional<U> {
+        if self.present != 0 {
+            f(self.data)
+        } else {
+            Optional::none()
+        }
+    }
+
+    pub fn unwrap_or(self, default: T) -> T {
+        if self.present != 0 {
+            self.data
+        } else {
+            default
+        }
+    }
+}
+
+impl<T: Default> Optional<T> {
+    pub fn none() -> Self {
+        Self::explicit_none(Default::default())
+    }
+
+    pub fn unwrap_or_default(self) -> T {
+        self.unwrap_or(Default::default())
+    }
+}
+
+impl<T: Default> Default for Optional<T> {
+    fn default() -> Self {
+        Self::none()
+    }
+}
+
+impl<T0: Default, T1: Default> Optional<(T0, T1)> {
+    pub fn flatten((f0, f1): (Optional<T0>, Optional<T1>)) -> Self {
+        if f0.present != 0 && f1.present != 0 {
+            Optional::some((f0.data, f1.data))
+        } else {
+            Optional::none()
+        }
+    }
+}
+
--- a/crates/cross/src/compound/peano.rs
+++ b/crates/cross/src/compound/peano.rs
--- a/crates/cross/src/dim.rs
+++ b/crates/cross/src/dim.rs
@@ -0,0 +1,66 @@
+use core::ops::{Index, IndexMut};
+
+use crate::vec::Vec3u;
+
+/// use this to wrap a flat region of memory into something which can be indexed by coordinates in
+/// 3d space.
+pub struct DimensionedSlice<T> {
+    dim: Vec3u,
+    items: T,
+}
+
+impl<T> DimensionedSlice<T> {
+    pub fn new(dim: Vec3u, items: T) -> Self {
+        Self { dim, items }
+    }
+}
+
+impl<'a, T: Index<usize> + ?Sized> Index<Vec3u> for DimensionedSlice<&'a T> {
+    type Output=T::Output;
+
+    fn index(&self, idx: Vec3u) -> &Self::Output {
+        let idx = index(idx, self.dim);
+        &self.items[idx]
+    }
+}
+
+impl<'a, T: Index<usize> + ?Sized> Index<Vec3u> for DimensionedSlice<&'a mut T> {
+    type Output=T::Output;
+
+    fn index(&self, idx: Vec3u) -> &Self::Output {
+        let idx = index(idx, self.dim);
+        &self.items[idx]
+    }
+}
+
+impl<'a, T: IndexMut<usize> + ?Sized> IndexMut<Vec3u> for DimensionedSlice<&'a mut T> {
+    fn index_mut(&mut self, idx: Vec3u) -> &mut Self::Output {
+        let idx = index(idx, self.dim);
+        &mut self.items[idx]
+    }
+}
+
+fn index(loc: Vec3u, dim: Vec3u) -> usize {
+    ((loc.z()*dim.y() + loc.y())*dim.x() + loc.x()) as usize
+}
+
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn test_index() {
+        let dim = Vec3u::new(2, 3, 7);
+        assert_eq!(index(Vec3u::new(0, 0, 0), dim), 0);
+        assert_eq!(index(Vec3u::new(1, 0, 0), dim), 1);
+        assert_eq!(index(Vec3u::new(0, 1, 0), dim), 2);
+        assert_eq!(index(Vec3u::new(1, 1, 0), dim), 3);
+        assert_eq!(index(Vec3u::new(0, 2, 0), dim), 4);
+        assert_eq!(index(Vec3u::new(0, 0, 1), dim), 6);
+        assert_eq!(index(Vec3u::new(1, 0, 1), dim), 7);
+        assert_eq!(index(Vec3u::new(0, 1, 1), dim), 8);
+        assert_eq!(index(Vec3u::new(1, 2, 1), dim), 11);
+        assert_eq!(index(Vec3u::new(1, 2, 2), dim), 17);
+    }
+}
+
--- a/crates/cross/src/lib.rs
+++ b/crates/cross/src/lib.rs
@@ -0,0 +1,11 @@
+#![no_std]
+#![feature(core_intrinsics)]
+
+pub mod compound;
+pub mod dim;
+pub mod mat;
+pub mod real;
+pub mod step;
+pub mod vec;
+// private because `vec` re-exports to important vecu constructs
+mod vecu;
--- a/crates/cross/src/mat/compound.rs
+++ b/crates/cross/src/mat/compound.rs
@@ -57,6 +57,24 @@ impl<R: Real, P: Peano, T: Material<R>> VariantHandler<P, T, Vec3<R>> for MoveBV
    }
 }

+/// invokes Into<T>::into on any variant
+struct IntoDispatcher;
+impl<P: Peano, T: Into<I>, I> VariantHandler<P, T, I> for IntoDispatcher {
+    fn call(self, v: T) -> I {
+        v.into()
+    }
+}
+
+impl<R, M0, M1> Into<FullyGenericMaterial<R>> for DiscrMat2<M0, M1>
+where
+    M0: DiscriminantCodable<P2> + Into<FullyGenericMaterial<R>> + Copy,
+    M1: Into<FullyGenericMaterial<R>> + Copy,
+{
+    fn into(self) -> FullyGenericMaterial<R> {
+        self.0.dispatch(IntoDispatcher)
+    }
+}
+
 impl<R: Real, M0, M1> Material<R> for DiscrMat2<M0, M1>
 where
    M0: DiscriminantCodable<P2> + Material<R> + Copy,
@@ -70,6 +88,17 @@ where
    }
 }

+impl<R, M0, M1, M2> Into<FullyGenericMaterial<R>> for DiscrMat3<M0, M1, M2>
+where
+    M0: DiscriminantCodable<P3> + Into<FullyGenericMaterial<R>> + Copy,
+    M1: Into<FullyGenericMaterial<R>> + Copy,
+    M2: Into<FullyGenericMaterial<R>> + Copy,
+{
+    fn into(self) -> FullyGenericMaterial<R> {
+        self.0.dispatch(IntoDispatcher)
+    }
+}
+
 impl<R: Real, M0, M1, M2> Material<R> for DiscrMat3<M0, M1, M2>
 where
    M0: DiscriminantCodable<P3> + Material<R> + Copy,
@@ -133,7 +162,7 @@ pub struct GenericMagnetic<R>(DiscrMat3<MBPgram<R>, MHPgram<R>, Vacuum>);
 impl<R: Real> Default for GenericMagnetic<R> {
    fn default() -> Self {
        // N.B.: the default is not the first variant.
-        // we order the variants specifically so that the first one can store the descriminant, but
+        // we order the variants specifically so that the first one can store the discriminant, but
        // we NEED Vacuum to be the default.
        Vacuum.into()
    }
@@ -186,6 +215,11 @@ impl<R: Real> From<MHPgram<R>> for FullyGenericMaterial<R> {
        Self::new(Default::default(), mat.into())
    }
 }
+impl<R: Real> From<Vacuum> for FullyGenericMaterial<R> {
+    fn from(mat: Vacuum) -> Self {
+        Self::new(Default::default(), mat.into())
+    }
+}
 impl<R: Real> From<IsomorphicConductor<R>> for FullyGenericMaterial<R> {
    fn from(mat: IsomorphicConductor<R>) -> Self {
        let mat: AnisomorphicConductor<R> = mat.into();
--- a/crates/cross/src/mat/conductor.rs
+++ b/crates/cross/src/mat/conductor.rs
--- a/crates/cross/src/mat/mb_pgram.rs
+++ b/crates/cross/src/mat/mb_pgram.rs
--- a/crates/cross/src/mat/mh_pgram.rs
+++ b/crates/cross/src/mat/mh_pgram.rs
--- a/crates/cross/src/mat/mod.rs
+++ b/crates/cross/src/mat/mod.rs
--- a/crates/cross/src/real.rs
+++ b/crates/cross/src/real.rs
@@ -20,11 +20,19 @@ pub trait ToFloat {
    }
 }

-#[cfg(feature = "fmt")]
-pub trait RealFeatures: fmt::LowerExp + fmt::Display + fmt::Debug {}
-#[cfg(not(feature = "fmt"))]
+
+#[cfg(all(not(feature = "fmt"), not(feature = "serde")))]
 pub trait RealFeatures {}

+#[cfg(all(not(feature = "fmt"), feature = "serde"))]
+pub trait RealFeatures: Serialize + for<'a> Deserialize<'a> {}
+
+#[cfg(all(feature = "fmt", not(feature = "serde")))]
+pub trait RealFeatures: fmt::LowerExp + fmt::Display + fmt::Debug {}
+
+#[cfg(all(feature = "fmt", feature = "serde"))]
+pub trait RealFeatures: fmt::LowerExp + fmt::Display + fmt::Debug + Serialize + for<'a> Deserialize<'a> {}
+
 /// This exists to allow configuration over # of bits (f32 v.s. f64) as well as
 /// constraints.
 pub trait Real:
@@ -292,7 +300,7 @@ impl<T: Real> Finite<T> {
    }
    #[cfg(not(feature = "fmt"))]
    fn handle_non_finite(_inner: T) -> ! {
-        panic!("Finite<T> is not finite");
+        panic!(); // expected a finite real
    }
 }

@@ -384,7 +392,6 @@ impl<T: ToFloat> ToFloat for Finite<T> {
 impl<T: RealFeatures> RealFeatures for Finite<T> {}

 impl<T: Real> Real for Finite<T> {
-    decl_consts!(Self::from_primitive);
    fn from_primitive<P: ToFloat>(p: P) -> Self {
        Self::new(T::from_primitive(p))
    }
@@ -422,6 +429,53 @@ impl<T: Real> Real for Finite<T> {
        let (s, c) = self.0.sin_cos();
        (Self::new(s), Self::new(c))
    }
+
+    // we would ideally use `decl_consts` here, but that produces f64 -> f32 casts for R32 code.
+    fn zero() -> Self {
+        Self::from_primitive(T::zero())
+    }
+    fn one() -> Self {
+        Self::from_primitive(T::one())
+    }
+    fn two() -> Self {
+        Self::from_primitive(T::two())
+    }
+    fn three() -> Self {
+        Self::from_primitive(T::three())
+    }
+    fn ten() -> Self {
+        Self::from_primitive(T::ten())
+    }
+    fn tenth() -> Self {
+        Self::from_primitive(T::tenth())
+    }
+    fn third() -> Self {
+        Self::from_primitive(T::third())
+    }
+    fn half() -> Self {
+        Self::from_primitive(T::half())
+    }
+    fn pi() -> Self {
+        Self::from_primitive(T::pi())
+    }
+    fn two_pi() -> Self {
+        Self::from_primitive(T::two_pi())
+    }
+    fn c() -> Self {
+        Self::from_primitive(T::c())
+    }
+    fn eps0() -> Self {
+        Self::from_primitive(T::eps0())
+    }
+    fn twice_eps0() -> Self {
+        Self::from_primitive(T::twice_eps0())
+    }
+    fn mu0() -> Self {
+        Self::from_primitive(T::mu0())
+    }
+    fn mu0_inv() -> Self {
+        Self::from_primitive(T::mu0_inv())
+    }
 }

 impl ToFloat for i32 {
--- a/crates/cross/src/step.rs
+++ b/crates/cross/src/step.rs
@@ -0,0 +1,382 @@
+use core::ops::{Index, IndexMut};
+
+use crate::compound::Optional;
+use crate::dim::DimensionedSlice;
+use crate::mat::Material;
+use crate::real::Real;
+use crate::vec::{Vec3, Vec3u};
+
+#[cfg(feature = "serde")]
+use serde::{Serialize, Deserialize};
+
+#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
+#[cfg_attr(feature = "fmt", derive(Debug))]
+#[derive(Copy, Clone, Default)]
+pub struct SimMeta<R> {
+    // TODO: make these private?
+    pub dim: Vec3u,
+    pub inv_feature_size: R,
+    pub time_step: R,
+    pub feature_size: R,
+}
+
+impl<R: Copy> SimMeta<R> {
+    pub fn dim(&self) -> Vec3u {
+        self.dim
+    }
+    pub fn inv_feature_size(&self) -> R {
+        self.inv_feature_size
+    }
+    pub fn time_step(&self) -> R {
+        self.time_step
+    }
+    pub fn feature_size(&self) -> R {
+        self.feature_size
+    }
+}
+
+impl<R: Real> SimMeta<R> {
+    pub fn cast<R2: Real>(self) -> SimMeta<R2> {
+        SimMeta {
+            dim: self.dim,
+            inv_feature_size: self.inv_feature_size.cast(),
+            time_step: self.time_step.cast(),
+            feature_size: self.feature_size.cast(),
+        }
+    }
+}
+
+/// Package the field vectors adjacent to some particular location.
+/// Particular those at negative offsets from the midpoint.
+/// This is used in step_e when looking at the H field deltas.
+#[derive(Copy, Clone)]
+pub struct VolumeSampleNeg<R> {
+    pub mid: Vec3<R>,
+    pub xm1: Optional<Vec3<R>>,
+    pub ym1: Optional<Vec3<R>>,
+    pub zm1: Optional<Vec3<R>>,
+}
+
+impl<R: Copy + Default> VolumeSampleNeg<R> {
+    pub fn from_indexable<I: Index<Vec3u, Output=Vec3<R>>>(i: &I, idx: Vec3u) -> Self {
+        VolumeSampleNeg {
+            mid: i[idx],
+            xm1: prev_x(idx).map(|idx| i[idx]),
+            ym1: prev_y(idx).map(|idx| i[idx]),
+            zm1: prev_z(idx).map(|idx| i[idx]),
+        }
+    }
+}
+
+fn prev_x(idx: Vec3u) -> Optional<Vec3u> {
+    match idx.into() {
+        (0, _, _) => Optional::none(),
+        (x, y, z) => Optional::some(Vec3u::new(x-1, y, z)),
+    }
+}
+fn prev_y(idx: Vec3u) -> Optional<Vec3u> {
+    match idx.into() {
+        (_, 0, _) => Optional::none(),
+        (x, y, z) => Optional::some(Vec3u::new(x, y-1, z)),
+    }
+}
+fn prev_z(idx: Vec3u) -> Optional<Vec3u> {
+    match idx.into() {
+        (_, _, 0) => Optional::none(),
+        (x, y, z) => Optional::some(Vec3u::new(x, y, z-1)),
+    }
+}
+
+impl<R: Real> VolumeSampleNeg<R> {
+    /// Calculate the delta in H values amongst this cell and its neighbors (left/up/out)
+    fn delta_h(self) -> FieldDeltas<R> {
+        let mid = self.mid;
+        // let (dfy_dx, dfz_dx) = self.xm1.map(|xm1| {
+        //     (mid.y() - xm1.y(), mid.z() - xm1.z())
+        // }).unwrap_or_default();
+
+        // let (dfx_dy, dfz_dy) = self.ym1.map(|ym1| {
+        //     (mid.x() - ym1.x(), mid.z() - ym1.z())
+        // }).unwrap_or_default();
+
+        // let (dfx_dz, dfy_dz) = self.zm1.map(|zm1| {
+        //     (mid.x() - zm1.x(), mid.y() - zm1.y())
+        // }).unwrap_or_default();
+
+        let (dfy_dx, dfz_dx) = if self.xm1.is_some() {
+            (mid.y() - self.xm1.unwrap().y(), mid.z() - self.xm1.unwrap().z())
+        } else {
+            (R::zero(), R::zero())
+        };
+
+        let (dfx_dy, dfz_dy) = if self.ym1.is_some() {
+            (mid.x() - self.ym1.unwrap().x(), mid.z() - self.ym1.unwrap().z())
+        } else {
+            (R::zero(), R::zero())
+        };
+
+        let (dfx_dz, dfy_dz) = if self.zm1.is_some() {
+            (mid.x() - self.zm1.unwrap().x(), mid.y() - self.zm1.unwrap().y())
+        } else {
+            (R::zero(), R::zero())
+        };
+        
+        FieldDeltas {
+            dfy_dx,
+            dfz_dx,
+            dfx_dy,
+            dfz_dy,
+            dfx_dz,
+            dfy_dz,
+        }
+    }
+}
+
+
+/// Package the field vectors adjacent to some particular location.
+/// Particular those at positive offsets from the midpoint.
+/// This is used in step_h when looking at the E field deltas.
+#[derive(Copy, Clone)]
+pub struct VolumeSamplePos<R> {
+    pub mid: Vec3<R>,
+    pub xp1: Optional<Vec3<R>>,
+    pub yp1: Optional<Vec3<R>>,
+    pub zp1: Optional<Vec3<R>>
+}
+
+impl<R: Copy + Default> VolumeSamplePos<R> {
+    pub fn from_indexable<I: Index<Vec3u, Output=Vec3<R>>>(i: &I, dim: Vec3u, idx: Vec3u) -> Self {
+        VolumeSamplePos {
+            mid: i[idx],
+            xp1: next_x(dim, idx).map(|idx| i[idx]),
+            yp1: next_y(dim, idx).map(|idx| i[idx]),
+            zp1: next_z(dim, idx).map(|idx| i[idx]),
+        }
+    }
+}
+
+fn next_x(dim: Vec3u, idx: Vec3u) -> Optional<Vec3u> {
+    match idx.into() {
+        (x, y, z) if x + 1 < dim.x() => Optional::some(Vec3u::new(x+1, y, z)),
+        _ => Optional::none(),
+    }
+}
+fn next_y(dim: Vec3u, idx: Vec3u) -> Optional<Vec3u> {
+    match idx.into() {
+        (x, y, z) if y + 1 < dim.y() => Optional::some(Vec3u::new(x, y+1, z)),
+        _ => Optional::none(),
+    }
+}
+fn next_z(dim: Vec3u, idx: Vec3u) -> Optional<Vec3u> {
+    match idx.into() {
+        (x, y, z) if z + 1 < dim.z() => Optional::some(Vec3u::new(x, y, z+1)),
+        _ => Optional::none(),
+    }
+}
+
+impl<R: Real> VolumeSamplePos<R> {
+    /// Calculate the delta in E values amongst this cell and its neighbors (right/down/in)
+    fn delta_e(self) -> FieldDeltas<R> {
+        let mid = self.mid;
+        // let (dfy_dx, dfz_dx) = self.xp1.map(|xp1| {
+        //     (xp1.y() - mid.y(), xp1.z() - mid.z())
+        // }).unwrap_or_default();
+
+        // let (dfx_dy, dfz_dy) = self.yp1.map(|yp1| {
+        //     (yp1.x() - mid.x(), yp1.z() - mid.z())
+        // }).unwrap_or_default();
+
+        // let (dfx_dz, dfy_dz) = self.zp1.map(|zp1| {
+        //     (zp1.x() - mid.x(), zp1.y() - mid.y())
+        // }).unwrap_or_default();
+
+        let (dfy_dx, dfz_dx) = if self.xp1.is_some() {
+            (self.xp1.unwrap().y() - mid.y(), self.xp1.unwrap().z() - mid.z())
+        } else {
+            (R::zero(), R::zero())
+        };
+
+        let (dfx_dy, dfz_dy) = if self.yp1.is_some() {
+            (self.yp1.unwrap().x() - mid.x(), self.yp1.unwrap().z() - mid.z())
+        } else {
+            (R::zero(), R::zero())
+        };
+
+        let (dfx_dz, dfy_dz) = if self.zp1.is_some() {
+            (self.zp1.unwrap().x() - mid.x(), self.zp1.unwrap().y() - mid.y())
+        } else {
+            (R::zero(), R::zero())
+        };
+        
+        FieldDeltas {
+            dfy_dx,
+            dfz_dx,
+            dfx_dy,
+            dfz_dy,
+            dfx_dz,
+            dfy_dz,
+        }
+    }
+}
+
+struct FieldDeltas<R> {
+    dfy_dx: R,
+    dfz_dx: R,
+    dfx_dy: R,
+    dfz_dy: R,
+    dfx_dz: R,
+    dfy_dz: R,
+}
+
+impl<R: Real> FieldDeltas<R> {
+    fn nabla(self) -> Vec3<R> {
+        Vec3::new(
+            self.dfz_dy - self.dfy_dz,
+            self.dfx_dz - self.dfz_dx,
+            self.dfy_dx - self.dfx_dy,
+        )
+    }
+}
+
+pub struct StepEContext<'a, R, M> {
+    pub inv_feature_size: R,
+    pub time_step: R,
+    pub stim_e: Vec3<R>,
+    pub mat: &'a M,
+    /// Input field sampled near this location
+    pub in_h: VolumeSampleNeg<R>,
+    pub in_e: Vec3<R>,
+}
+
+impl<'a, R: Real, M: Material<R>> StepEContext<'a, R, M> {
+    pub fn step_flat_view<RM, RF, WF>(
+        meta: SimMeta<R>,
+        mat: &RM,
+        stim_e: &RF,
+        e: &mut WF,
+        h: &RF,
+        idx: Vec3u,
+    )
+    where
+        RM: Index<usize, Output=M> + ?Sized,
+        RF: Index<usize, Output=Vec3<R>> + ?Sized,
+        WF: Index<usize, Output=Vec3<R>> + IndexMut<usize> + ?Sized,
+    {
+        let dim = meta.dim;
+        let stim_e_matrix = DimensionedSlice::new(dim, stim_e);
+        let mat_matrix = DimensionedSlice::new(dim, mat);
+        let mut e_matrix = DimensionedSlice::new(dim, e);
+        let h_matrix = DimensionedSlice::new(dim, h);
+
+        let stim_e = stim_e_matrix[idx];
+        let mat = &mat_matrix[idx];
+        let in_e = e_matrix[idx];
+        let in_h = VolumeSampleNeg::from_indexable(&h_matrix, idx);
+
+        let update_state = StepEContext {
+            inv_feature_size: meta.inv_feature_size,
+            time_step: meta.time_step,
+            stim_e,
+            mat,
+            in_h,
+            in_e,
+        };
+        let new_e = update_state.step_e();
+        e_matrix[idx] = new_e;
+    }
+
+    pub fn step_e(self) -> Vec3<R> {
+        let twice_eps0 = R::twice_eps0();
+        let deltas = self.in_h.delta_h();
+        // \nabla x H
+        let nabla_h = deltas.nabla() * self.inv_feature_size;
+        // $\nabla x H = \epsilon_0 dE/dt + \sigma E$
+        // no-conductivity version:
+        //   let delta_e = nabla_h * (self.time_step * EPS0_INV);
+        let sigma = self.mat.conductivity();
+        let e_prev = self.in_e;
+        let delta_e = (nabla_h - e_prev.elem_mul(sigma)).elem_div(
+            sigma*self.time_step + Vec3::uniform(twice_eps0)
+        )*(R::two()*self.time_step);
+        // println!("spirv-step_e delta_e: {:?}", delta_e);
+        e_prev + delta_e + self.stim_e
+    }
+}
+
+
+pub struct StepHContext<'a, R, M> {
+    pub inv_feature_size: R,
+    pub time_step: R,
+    pub stim_h: Vec3<R>,
+    pub mat: &'a M,
+    /// Input field sampled near this location
+    pub in_e: VolumeSamplePos<R>,
+    pub in_h: Vec3<R>,
+    pub in_m: Vec3<R>,
+}
+
+impl<'a, R: Real, M: Material<R>> StepHContext<'a, R, M> {
+    pub fn step_flat_view<RM, RF, WF>(
+        meta: SimMeta<R>,
+        mat: &RM,
+        stim_h: &RF,
+        e: &RF,
+        h: &mut WF,
+        m: &mut WF,
+        idx: Vec3u,
+    )
+    where
+        RM: Index<usize, Output=M> + ?Sized,
+        RF: Index<usize, Output=Vec3<R>> + ?Sized,
+        WF: Index<usize, Output=Vec3<R>> + IndexMut<usize> + ?Sized,
+    {
+        let dim = meta.dim;
+        let stim_h_matrix = DimensionedSlice::new(dim, stim_h);
+        let mat_matrix = DimensionedSlice::new(dim, mat);
+        let e_matrix = DimensionedSlice::new(dim, e);
+        let mut h_matrix = DimensionedSlice::new(dim, h);
+        let mut m_matrix = DimensionedSlice::new(dim, m);
+
+        let stim_h = stim_h_matrix[idx];
+        let mat = &mat_matrix[idx];
+        let in_e = VolumeSamplePos::from_indexable(&e_matrix, dim, idx);
+        let in_h = h_matrix[idx];
+        let in_m = m_matrix[idx];
+
+        let update_state = StepHContext {
+            inv_feature_size: meta.inv_feature_size,
+            time_step: meta.time_step,
+            stim_h,
+            mat,
+            in_e,
+            in_h,
+            in_m,
+        };
+        let (new_h, new_m) = update_state.step_h();
+        h_matrix[idx] = new_h;
+        m_matrix[idx] = new_m;
+    }
+
+    pub fn step_h(self) -> (Vec3<R>, Vec3<R>) {
+        let mu0 = R::mu0();
+        let mu0_inv = R::mu0_inv();
+        let deltas = self.in_e.delta_e();
+        // println!("spirv-step_h delta_e_struct: {:?}", deltas);
+        // \nabla x E
+        let nabla_e = deltas.nabla() * self.inv_feature_size;
+        // println!("spirv-step_h nabla_e: {:?}", nabla_e);
+        let delta_b = nabla_e * (-self.time_step);
+
+        // Relation between these is: B = mu0*(H + M)
+        let old_h = self.in_h;
+        let old_m = self.in_m;
+        let old_b = (old_h + old_m) * mu0;
+
+        let new_b = old_b + delta_b + self.stim_h * mu0;
+        let mat = self.mat;
+        let new_m = mat.move_b_vec(old_m, new_b);
+        let new_h = new_b * mu0_inv - new_m;
+        // println!("spirv-step_h delta_h: {:?}", delta_h);
+        (new_h, new_m)
+    }
+}
--- a/crates/cross/src/vec.rs
+++ b/crates/cross/src/vec.rs
@@ -124,13 +124,17 @@ impl<R: Real> Vec2<R> {
        self.mag_sq().sqrt()
    }

-    pub fn with_mag(&self, new_mag: R) -> Self {
+    pub fn with_mag(&self, new_mag: R) -> Option<Self> {
        if new_mag.is_zero() {
            // avoid div-by-zero if self.mag() == 0 and new_mag == 0
-            Vec2::new(R::zero(), R::zero())
+            Some(Self::zero())
        } else {
-            let scale = new_mag / self.mag();
-            *self * scale
+            let old_mag = self.mag();
+            if old_mag.is_zero() {
+                None
+            } else {
+                Some(*self * (new_mag / old_mag))
+            }
        }
    }

@@ -329,13 +333,17 @@ impl<R: Real> Vec3<R> {
        Self::new(self.x().exp(), self.y().exp(), self.z().exp())
    }

-    pub fn with_mag(&self, new_mag: R) -> Self {
+    pub fn with_mag(&self, new_mag: R) -> Option<Self> {
        if new_mag.is_zero() {
            // avoid div-by-zero if self.mag() == 0 and new_mag == 0
-            Self::zero()
+            Some(Self::zero())
        } else {
-            let scale = new_mag / self.mag();
-            *self * scale
+            let old_mag = self.mag();
+            if old_mag.is_zero() {
+                None
+            } else {
+                Some(*self * (new_mag / old_mag))
+            }
        }
    }

@@ -344,7 +352,7 @@ impl<R: Real> Vec3<R> {
        if *self == Self::zero() {
            *self
        } else {
-            self.with_mag(R::one())
+            self.with_mag(R::one()).unwrap()
        }
    }
    pub fn round(&self) -> Self {
--- a/crates/cross/src/vecu.rs
+++ b/crates/cross/src/vecu.rs
@@ -51,6 +51,10 @@ impl Vec3u {
    pub fn product_sum(&self) -> u32 {
        self.x * self.y * self.z
    }
+
+    pub fn product_sum_usize(&self) -> usize {
+        self.x as usize * self.y as usize * self.z as usize
+    }
 }

 impl From<(u32, u32, u32)> for Vec3u {
@@ -59,6 +63,12 @@ impl From<(u32, u32, u32)> for Vec3u {
    }
 }

+impl Into<(u32, u32, u32)> for Vec3u {
+    fn into(self) -> (u32, u32, u32) {
+        (self.x, self.y, self.z)
+    }
+}
+
 impl<R: Real> From<Vec3<R>> for Vec3u {
    fn from(v: Vec3<R>) -> Self {
        Self::new(v.x().to_f64() as _, v.y().to_f64() as _, v.z().to_f64() as _)
--- a/crates/post/src/bin/csv.rs
+++ b/crates/post/src/bin/csv.rs
@@ -15,17 +15,13 @@ fn main() {
    
    let mut frame = cache.load_first();
    for meas in frame.measurements() {
-        for key in meas.key_value(&**frame).keys() {
-            print!("\"{}\",", key);
-        }
+        print!("\"{}\",", meas.key());
    }
    println!("");

    loop {
        for meas in frame.measurements() {
-            for value in meas.key_value(&**frame).values() {
-                print!("\"{}\",", value);
-            }
+            print!("\"{}\",", meas.value());
        }
        println!("");

--- a/crates/post/src/bin/viewer.rs
+++ b/crates/post/src/bin/viewer.rs
@@ -1,8 +1,11 @@
 use coremem_post::{Loader, Viewer};
 use std::path::PathBuf;
+use std::io::Write as _;
 use std::time::Duration;
 use structopt::StructOpt;
+use crossterm::{cursor, QueueableCommand as _};
 use crossterm::event::{Event, KeyCode};
+use crossterm::style::{style, PrintStyledContent};

 #[derive(Debug, StructOpt)]
 struct Opt {
@@ -33,6 +36,12 @@ fn event_loop(mut viewer: Viewer) {
            }
        }
        viewer.navigate(time_steps, z_steps);
+
+        let mut stdout = std::io::stdout();
+        stdout.queue(cursor::MoveToColumn(30)).unwrap();
+        stdout.queue(PrintStyledContent(style("wasd=appearance; arrows,PgUp/PgDown=navigate; q=quit"))).unwrap();
+        stdout.flush().unwrap();
+
        let _ = crossterm::event::poll(Duration::from_millis(33)).unwrap();
    }
 }
--- a/crates/post/src/lib.rs
+++ b/crates/post/src/lib.rs
@@ -1,16 +1,14 @@
 //! Post-processing tools
-use coremem::meas::AbstractMeasurement;
+use coremem::meas;
 use coremem::render::{ColorTermRenderer, Renderer as _, RenderConfig, SerializedFrame};
-use coremem::sim::StaticSim;
-use coremem::sim::legacy::SimState;
+use coremem::sim::{AbstractSim, GenericSim};

 use itertools::Itertools as _;
 use lru::LruCache;
 use rayon::{ThreadPool, ThreadPoolBuilder};
 use std::collections::HashSet;
 use std::fs::{DirEntry, File, read_dir};
-use std::io::{BufReader, Seek as _, SeekFrom};
-use std::ops::Deref;
+use std::io::BufReader;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::sync::mpsc::{self, Receiver, Sender};
@@ -34,25 +32,21 @@ pub type Result<T> = std::result::Result<T, Error>;

 pub struct Frame {
    path: PathBuf,
-    data: SerializedFrame<StaticSim>,
+    data: SerializedFrame<GenericSim<f32>>,
 }

 impl Frame {
-    pub fn measurements(&self) -> &[Box<dyn AbstractMeasurement>] {
+    pub fn measurements(&self) -> &[meas::Evaluated] {
        &*self.data.measurements
    }
+    pub fn sim(&self) -> &GenericSim<f32> {
+        &self.data.state
+    }
    pub fn path(&self) -> &Path {
        &*self.path
    }
 }

-impl Deref for Frame {
-    type Target = StaticSim;
-    fn deref(&self) -> &Self::Target {
-        &self.data.state
-    }
-}
-
 #[derive(Default)]
 pub struct Loader {
    dir: PathBuf,
@@ -106,20 +100,19 @@ impl Loader {

    fn load(&self, path: &Path) -> Result<Frame> {
        let mut reader = BufReader::new(File::open(path).unwrap());
-        // Try to deserialize a couple different types of likely sims.
-        // TODO: would be good to drop a marker in the file to make sure we don't
-        // decode to a valid but incorrect state...
-        let data = bincode::deserialize_from(&mut reader).or_else(|_| -> Result<_> {
-            reader.seek(SeekFrom::Start(0)).unwrap();
-            let data: SerializedFrame<SimState<f32>> =
-                bincode::deserialize_from(&mut reader)?;
-            Ok(SerializedFrame::to_static(data))
-        }).or_else(|_| -> Result<_> {
-            reader.seek(SeekFrom::Start(0)).unwrap();
-            let data: SerializedFrame<SimState<f64>> =
-                bincode::deserialize_from(reader)?;
-            Ok(SerializedFrame::to_static(data))
-        })?;
+        // let data = bincode::deserialize_from(&mut reader).or_else(|_| -> Result<_> {
+        //     reader.seek(SeekFrom::Start(0)).unwrap();
+        //     let data: SerializedFrame<SimState<f32>> =
+        //         bincode::deserialize_from(&mut reader)?;
+        //     Ok(SerializedFrame::to_static(data))
+        // }).or_else(|_| -> Result<_> {
+        //     reader.seek(SeekFrom::Start(0)).unwrap();
+        //     let data: SerializedFrame<SimState<f64>> =
+        //         bincode::deserialize_from(reader)?;
+        //     Ok(SerializedFrame::to_static(data))
+        // })?;
+        // TODO: try to decode a few common sim types (as above) if this fails?
+        let data = bincode::deserialize_from(&mut reader)?;
        Ok(Frame {
            path: path.into(),
            data
@@ -254,7 +247,7 @@ impl Viewer {
        let mut cache = LoaderCache::new(loader, 6, 6);
        let viewing = cache.load_first();
        Self {
-            z: viewing.depth() / 2,
+            z: viewing.sim().depth() / 2,
            viewing,
            cache,
            renderer: Default::default(),
@@ -264,7 +257,7 @@ impl Viewer {
    }
    pub fn navigate(&mut self, time_steps: isize, z_steps: i32) {
        let new_z = (self.z as i32).saturating_add(z_steps);
-        let new_z = new_z.max(0).min(self.viewing.depth() as i32 - 1) as u32;
+        let new_z = new_z.max(0).min(self.viewing.sim().depth() as i32 - 1) as u32;
        if time_steps == 0 && new_z == self.z && self.render_config == self.last_config {
            return;
        }
@@ -276,7 +269,10 @@ impl Viewer {
    }
    pub fn render(&self) {
        self.renderer.render_z_slice(
-            &**self.viewing, self.z, &self.viewing.data.measurements, self.render_config
+            self.viewing.sim(),
+            self.z,
+            &*meas::as_dyn_measurements(self.viewing.measurements()),
+            self.render_config,
        );
    }
    pub fn render_config(&mut self) -> &mut RenderConfig {
--- a/crates/spirv_backend/Cargo.toml
+++ b/crates/spirv_backend/Cargo.toml
@@ -8,4 +8,4 @@ crate-type = ["dylib", "lib"]

 [dependencies]
 spirv-std = { git = "https://github.com/EmbarkStudios/rust-gpu", features = ["glam"] }  # MIT or Apache 2.0
-coremem_types = { path = "../types" }
+coremem_cross = { path = "../cross" }
--- a/crates/spirv_backend/src/adapt.rs
+++ b/crates/spirv_backend/src/adapt.rs
@@ -0,0 +1,53 @@
+use spirv_std::RuntimeArray;
+
+use coremem_cross::mat::Material;
+use coremem_cross::real::Real;
+use coremem_cross::step::{SimMeta, StepEContext, StepHContext};
+use coremem_cross::vec::{Vec3, Vec3u};
+
+use crate::support::SizedArray;
+
+pub(crate) fn step_h<R: Real, M: Material<R>>(
+    idx: Vec3u,
+    meta: &SimMeta<R>,
+    stimulus_h: &RuntimeArray<Vec3<R>>,
+    material: &RuntimeArray<M>,
+    e: &RuntimeArray<Vec3<R>>,
+    h: &mut RuntimeArray<Vec3<R>>,
+    m: &mut RuntimeArray<Vec3<R>>,
+) {
+    if idx.x() < meta.dim.x() && idx.y() < meta.dim.y() && idx.z() < meta.dim.z() {
+        let dim = meta.dim;
+        let len = dim.product_sum_usize();
+
+        let stim_h_array = unsafe { SizedArray::new(stimulus_h, len) };
+        let mat_array = unsafe { SizedArray::new(material, len) };
+        let e_array = unsafe { SizedArray::new(e, len) };
+        let mut h_array = unsafe { SizedArray::new(h, len) };
+        let mut m_array = unsafe { SizedArray::new(m, len) };
+
+        StepHContext::step_flat_view(*meta, &mat_array, &stim_h_array, &e_array, &mut h_array, &mut m_array, idx);
+    }
+}
+
+pub(crate) fn step_e<R: Real, M: Material<R>>(
+    idx: Vec3u,
+    meta: &SimMeta<R>,
+    stimulus_e: &RuntimeArray<Vec3<R>>,
+    material: &RuntimeArray<M>,
+    e: &mut RuntimeArray<Vec3<R>>,
+    h: &RuntimeArray<Vec3<R>>,
+) {
+    if idx.x() < meta.dim.x() && idx.y() < meta.dim.y() && idx.z() < meta.dim.z() {
+        let dim = meta.dim;
+        let len = dim.product_sum_usize();
+
+        let stim_e_array = unsafe { SizedArray::new(stimulus_e, len) };
+        let mat_array = unsafe { SizedArray::new(material, len) };
+        let mut e_array = unsafe { SizedArray::new(e, len) };
+        let h_array = unsafe { SizedArray::new(h, len) };
+
+        StepEContext::step_flat_view(*meta, &mat_array, &stim_e_array, &mut e_array, &h_array, idx);
+    }
+}
+
--- a/crates/spirv_backend/src/lib.rs
+++ b/crates/spirv_backend/src/lib.rs
@@ -8,19 +8,17 @@

 extern crate spirv_std;

-pub use spirv_std::glam;
+use spirv_std::{glam, RuntimeArray};
 #[cfg(not(target_arch = "spirv"))]
 use spirv_std::macros::spirv;

-pub mod sim;
-pub mod support;
+mod adapt;
+mod support;

-pub use sim::{SerializedSimMeta, SerializedStepE, SerializedStepH};
-pub use support::{Optional, UnsizedArray};
-
-use coremem_types::mat::{Ferroxcube3R1MH, FullyGenericMaterial, IsoConductorOr, Material};
-use coremem_types::real::Real;
-use coremem_types::vec::{Vec3, Vec3u};
+use coremem_cross::mat::{Ferroxcube3R1MH, FullyGenericMaterial, IsoConductorOr};
+use coremem_cross::real::R32;
+use coremem_cross::step::SimMeta;
+use coremem_cross::vec::{Vec3, Vec3u};

 type Iso3R1<R> = IsoConductorOr<R, Ferroxcube3R1MH>;

@@ -28,93 +26,61 @@ fn glam_vec_to_internal(v: glam::UVec3) -> Vec3u {
    Vec3u::new(v.x, v.y, v.z)
 }

-fn step_h<R: Real, M: Material<R>>(
-    id: Vec3u,
-    meta: &SerializedSimMeta<R>,
-    stimulus_h: &UnsizedArray<Vec3<R>>,
-    material: &UnsizedArray<M>,
-    e: &UnsizedArray<Vec3<R>>,
-    h: &mut UnsizedArray<Vec3<R>>,
-    m: &mut UnsizedArray<Vec3<R>>,
-) {
-    if id.x() < meta.dim.x() && id.y() < meta.dim.y() && id.z() < meta.dim.z() {
-        let sim_state = SerializedStepH::new(meta, stimulus_h, material, e, h, m);
-        let update_state = sim_state.index(id);
-        update_state.step_h();
-    }
+mod private {
+    pub trait Sealed {}
 }

-fn step_e<R: Real, M: Material<R>>(
-    id: Vec3u,
-    meta: &SerializedSimMeta<R>,
-    stimulus_e: &UnsizedArray<Vec3<R>>,
-    material: &UnsizedArray<M>,
-    e: &mut UnsizedArray<Vec3<R>>,
-    h: &UnsizedArray<Vec3<R>>,
-) {
-    if id.x() < meta.dim.x() && id.y() < meta.dim.y() && id.z() < meta.dim.z() {
-        let sim_state = SerializedStepE::new(meta, stimulus_e, material, e, h);
-
-        let update_state = sim_state.index(id);
-        update_state.step_e();
-    }
-}
-
-/// Return the step_h/step_e entry point names for the provided material
-pub fn entry_points<M: 'static>() -> Optional<(&'static str, &'static str)> {
-    use core::any::TypeId;
-    let mappings = [
-        (TypeId::of::<FullyGenericMaterial<f32>>(),
-            ("step_h_generic_material", "step_e_generic_material")
-        ),
-        (TypeId::of::<Iso3R1<f32>>(),
-            ("step_h_iso_3r1", "step_e_iso_3r1")
-        ),
-    ];
-
-    for (id, names) in mappings {
-        if id == TypeId::of::<M>() {
-            return Optional::some(names);
-        }
-    }
-    Optional::none()
+pub trait HasEntryPoints<R>: private::Sealed {
+    fn step_h() -> &'static str;
+    fn step_e() -> &'static str;
 }

 macro_rules! steps {
    ($flt:ty, $mat:ty, $step_h:ident, $step_e:ident) => {
+        impl private::Sealed for $mat { }
+        impl HasEntryPoints<$flt> for $mat {
+            fn step_h() -> &'static str {
+                stringify!($step_h)
+            }
+            fn step_e() -> &'static str {
+                stringify!($step_e)
+            }
+        }
        // LocalSize/numthreads
        #[spirv(compute(threads(4, 4, 4)))]
        pub fn $step_h(
            #[spirv(global_invocation_id)] id: glam::UVec3,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 0)] meta: &SerializedSimMeta<$flt>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 0)] meta: &SimMeta<$flt>,
            // XXX: delete this input?
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 1)] _unused_stimulus_e: &UnsizedArray<Vec3<$flt>>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 2)] stimulus_h: &UnsizedArray<Vec3<$flt>>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 3)] material: &UnsizedArray<$mat>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 4)] e: &UnsizedArray<Vec3<$flt>>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 5)] h: &mut UnsizedArray<Vec3<$flt>>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 6)] m: &mut UnsizedArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 1)] _unused_stimulus_e: &RuntimeArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 2)] stimulus_h: &RuntimeArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 3)] material: &RuntimeArray<$mat>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 4)] e: &RuntimeArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 5)] h: &mut RuntimeArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 6)] m: &mut RuntimeArray<Vec3<$flt>>,
        ) {
-            step_h(glam_vec_to_internal(id), meta, stimulus_h, material, e, h, m)
+            adapt::step_h(glam_vec_to_internal(id), meta, stimulus_h, material, e, h, m)
        }

        #[spirv(compute(threads(4, 4, 4)))]
        pub fn $step_e(
            #[spirv(global_invocation_id)] id: glam::UVec3,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 0)] meta: &SerializedSimMeta<$flt>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 1)] stimulus_e: &UnsizedArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 0)] meta: &SimMeta<$flt>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 1)] stimulus_e: &RuntimeArray<Vec3<$flt>>,
            // XXX: delete this input?
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 2)] _unused_stimulus_h: &UnsizedArray<Vec3<$flt>>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 3)] material: &UnsizedArray<$mat>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 4)] e: &mut UnsizedArray<Vec3<$flt>>,
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 5)] h: &UnsizedArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 2)] _unused_stimulus_h: &RuntimeArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 3)] material: &RuntimeArray<$mat>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 4)] e: &mut RuntimeArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 5)] h: &RuntimeArray<Vec3<$flt>>,
            // XXX: can/should this m input be deleted?
-            #[spirv(storage_buffer, descriptor_set = 0, binding = 6)] _unused_m: &UnsizedArray<Vec3<$flt>>,
+            #[spirv(storage_buffer, descriptor_set = 0, binding = 6)] _unused_m: &RuntimeArray<Vec3<$flt>>,
        ) {
-            step_e(glam_vec_to_internal(id), meta, stimulus_e, material, e, h)
+            adapt::step_e(glam_vec_to_internal(id), meta, stimulus_e, material, e, h)
        }
    };
 }

-steps!(f32, FullyGenericMaterial<f32>, step_h_generic_material, step_e_generic_material);
-steps!(f32, Iso3R1<f32>, step_h_iso_3r1, step_e_iso_3r1);
+steps!(f32, FullyGenericMaterial<f32>, step_h_generic_material_f32, step_e_generic_material_f32);
+steps!(f32, Iso3R1<f32>, step_h_iso_3r1_f32, step_e_iso_3r1_f32);
+steps!(R32, FullyGenericMaterial<R32>, step_h_generic_material_r32, step_e_generic_material_r32);
+steps!(R32, Iso3R1<R32>, step_h_iso_3r1_r32, step_e_iso_3r1_r32);
--- a/crates/spirv_backend/src/sim.rs
+++ b/crates/spirv_backend/src/sim.rs
@@ -1,342 +0,0 @@
-// use spirv_std::RuntimeArray;
-use crate::support::{
-    Array3, Array3Mut, ArrayHandle, ArrayHandleMut, Optional, UnsizedArray
-};
-use coremem_types::mat::Material;
-use coremem_types::real::Real;
-use coremem_types::vec::{Vec3, Vec3u};
-
-#[derive(Copy, Clone)]
-pub struct SerializedSimMeta<R> {
-    pub dim: Vec3u,
-    pub inv_feature_size: R,
-    pub time_step: R,
-    pub feature_size: R,
-}
-
-
-/// Whatever data we received from the host in their call to step_h
-pub struct SerializedStepH<'a, R, M> {
-    meta: &'a SerializedSimMeta<R>,
-    stimulus_h: &'a UnsizedArray<Vec3<R>>,
-    material: &'a UnsizedArray<M>,
-    e: &'a UnsizedArray<Vec3<R>>,
-    h: &'a mut UnsizedArray<Vec3<R>>,
-    m: &'a mut UnsizedArray<Vec3<R>>,
-}
-
-impl<'a, R, M> SerializedStepH<'a, R, M> {
-    pub fn new(
-        meta: &'a SerializedSimMeta<R>,
-        stimulus_h: &'a UnsizedArray<Vec3<R>>,
-        material: &'a UnsizedArray<M>,
-        e: &'a UnsizedArray<Vec3<R>>,
-        h: &'a mut UnsizedArray<Vec3<R>>,
-        m: &'a mut UnsizedArray<Vec3<R>>,
-    ) -> Self {
-        Self {
-            meta, stimulus_h, material, e, h, m
-        }
-    }
-}
-
-impl<'a, R: Real, M> SerializedStepH<'a, R, M> {
-    pub fn index(self, idx: Vec3u) -> StepHContext<'a, R, M> {
-        let dim = self.meta.dim;
-        let stim_h_matrix = Array3::new(self.stimulus_h, dim);
-        let mat_matrix = Array3::new(self.material, dim);
-        let e = Array3::new(self.e, dim);
-        let h = Array3Mut::new(self.h, dim);
-        let m = Array3Mut::new(self.m, dim);
-
-        let in_e = VolumeSamplePos {
-            mid: e.get(idx).unwrap(),
-            xp1: e.get(idx + Vec3u::unit_x()),
-            yp1: e.get(idx + Vec3u::unit_y()),
-            zp1: e.get(idx + Vec3u::unit_z()),
-        };
-        let out_h = h.into_mut_handle(idx);
-        let out_m = m.into_mut_handle(idx);
-
-        let mat = mat_matrix.into_handle(idx);
-
-        StepHContext {
-            inv_feature_size: self.meta.inv_feature_size,
-            time_step: self.meta.time_step,
-            stim_h: stim_h_matrix.get(idx).unwrap(),
-            mat,
-            in_e,
-            out_h,
-            out_m,
-        }
-    }
-}
-
-/// Whatever data we received from the host in their call to step_e
-pub struct SerializedStepE<'a, R, M> {
-    meta: &'a SerializedSimMeta<R>,
-    stimulus_e: &'a UnsizedArray<Vec3<R>>,
-    material: &'a UnsizedArray<M>,
-    e: &'a mut UnsizedArray<Vec3<R>>,
-    h: &'a UnsizedArray<Vec3<R>>,
-}
-
-impl<'a, R, M> SerializedStepE<'a, R, M> {
-    pub fn new(
-        meta: &'a SerializedSimMeta<R>,
-        stimulus_e: &'a UnsizedArray<Vec3<R>>,
-        material: &'a UnsizedArray<M>,
-        e: &'a mut UnsizedArray<Vec3<R>>,
-        h: &'a UnsizedArray<Vec3<R>>,
-    ) -> Self {
-        Self {
-            meta, stimulus_e, material, e, h
-        }
-    }
-}
-
-impl<'a, R: Real, M> SerializedStepE<'a, R, M> {
-    pub fn index(self, idx: Vec3u) -> StepEContext<'a, R, M> {
-        let dim = self.meta.dim;
-        let stim_e_matrix = Array3::new(self.stimulus_e, dim);
-        let mat_matrix = Array3::new(self.material, dim);
-        let e = Array3Mut::new(self.e, dim);
-        let h = Array3::new(self.h, dim);
-
-        let xm1 = if idx.x() == 0 {
-            Optional::none()
-        } else {
-            h.get(idx - Vec3u::unit_x())
-        };
-        let ym1 = if idx.y() == 0 {
-            Optional::none()
-        } else {
-            h.get(idx - Vec3u::unit_y())
-        };
-        let zm1 = if idx.z() == 0 {
-            Optional::none()
-        } else {
-            h.get(idx - Vec3u::unit_z())
-        };
-
-        let in_h = VolumeSampleNeg {
-            mid: h.get(idx).unwrap(),
-            xm1,
-            ym1,
-            zm1,
-        };
-        let out_e = e.into_mut_handle(idx);
-
-        let mat = mat_matrix.into_handle(idx);
-
-        StepEContext {
-            inv_feature_size: self.meta.inv_feature_size,
-            time_step: self.meta.time_step,
-            stim_e: stim_e_matrix.get(idx).unwrap(),
-            mat,
-            in_h,
-            out_e,
-        }
-    }
-}
-
-/// Package the field vectors adjacent to some particular location.
-/// Particular those at negative offsets from the midpoint.
-/// This is used in step_e when looking at the H field deltas.
-#[derive(Copy, Clone)]
-struct VolumeSampleNeg<R> {
-    mid: Vec3<R>,
-    xm1: Optional<Vec3<R>>,
-    ym1: Optional<Vec3<R>>,
-    zm1: Optional<Vec3<R>>,
-}
-
-impl<R: Real> VolumeSampleNeg<R> {
-    /// Calculate the delta in H values amongst this cell and its neighbors (left/up/out)
-    fn delta_h(self) -> FieldDeltas<R> {
-        let mid = self.mid;
-        // let (dfy_dx, dfz_dx) = self.xm1.map(|xm1| {
-        //     (mid.y() - xm1.y(), mid.z() - xm1.z())
-        // }).unwrap_or_default();
-
-        // let (dfx_dy, dfz_dy) = self.ym1.map(|ym1| {
-        //     (mid.x() - ym1.x(), mid.z() - ym1.z())
-        // }).unwrap_or_default();
-
-        // let (dfx_dz, dfy_dz) = self.zm1.map(|zm1| {
-        //     (mid.x() - zm1.x(), mid.y() - zm1.y())
-        // }).unwrap_or_default();
-
-        let (dfy_dx, dfz_dx) = if self.xm1.is_some() {
-            (mid.y() - self.xm1.unwrap().y(), mid.z() - self.xm1.unwrap().z())
-        } else {
-            (R::zero(), R::zero())
-        };
-
-        let (dfx_dy, dfz_dy) = if self.ym1.is_some() {
-            (mid.x() - self.ym1.unwrap().x(), mid.z() - self.ym1.unwrap().z())
-        } else {
-            (R::zero(), R::zero())
-        };
-
-        let (dfx_dz, dfy_dz) = if self.zm1.is_some() {
-            (mid.x() - self.zm1.unwrap().x(), mid.y() - self.zm1.unwrap().y())
-        } else {
-            (R::zero(), R::zero())
-        };
-        
-        FieldDeltas {
-            dfy_dx,
-            dfz_dx,
-            dfx_dy,
-            dfz_dy,
-            dfx_dz,
-            dfy_dz,
-        }
-    }
-}
-
-
-/// Package the field vectors adjacent to some particular location.
-/// Particular those at positive offsets from the midpoint.
-/// This is used in step_h when looking at the E field deltas.
-#[derive(Copy, Clone)]
-struct VolumeSamplePos<R> {
-    mid: Vec3<R>,
-    xp1: Optional<Vec3<R>>,
-    yp1: Optional<Vec3<R>>,
-    zp1: Optional<Vec3<R>>
-}
-
-impl<R: Real> VolumeSamplePos<R> {
-    /// Calculate the delta in E values amongst this cell and its neighbors (right/down/in)
-    fn delta_e(self) -> FieldDeltas<R> {
-        let mid = self.mid;
-        // let (dfy_dx, dfz_dx) = self.xp1.map(|xp1| {
-        //     (xp1.y() - mid.y(), xp1.z() - mid.z())
-        // }).unwrap_or_default();
-
-        // let (dfx_dy, dfz_dy) = self.yp1.map(|yp1| {
-        //     (yp1.x() - mid.x(), yp1.z() - mid.z())
-        // }).unwrap_or_default();
-
-        // let (dfx_dz, dfy_dz) = self.zp1.map(|zp1| {
-        //     (zp1.x() - mid.x(), zp1.y() - mid.y())
-        // }).unwrap_or_default();
-
-        let (dfy_dx, dfz_dx) = if self.xp1.is_some() {
-            (self.xp1.unwrap().y() - mid.y(), self.xp1.unwrap().z() - mid.z())
-        } else {
-            (R::zero(), R::zero())
-        };
-
-        let (dfx_dy, dfz_dy) = if self.yp1.is_some() {
-            (self.yp1.unwrap().x() - mid.x(), self.yp1.unwrap().z() - mid.z())
-        } else {
-            (R::zero(), R::zero())
-        };
-
-        let (dfx_dz, dfy_dz) = if self.zp1.is_some() {
-            (self.zp1.unwrap().x() - mid.x(), self.zp1.unwrap().y() - mid.y())
-        } else {
-            (R::zero(), R::zero())
-        };
-        
-        FieldDeltas {
-            dfy_dx,
-            dfz_dx,
-            dfx_dy,
-            dfz_dy,
-            dfx_dz,
-            dfy_dz,
-        }
-    }
-}
-
-struct FieldDeltas<R> {
-    dfy_dx: R,
-    dfz_dx: R,
-    dfx_dy: R,
-    dfz_dy: R,
-    dfx_dz: R,
-    dfy_dz: R,
-}
-
-impl<R: Real> FieldDeltas<R> {
-    fn nabla(self) -> Vec3<R> {
-        Vec3::new(
-            self.dfz_dy - self.dfy_dz,
-            self.dfx_dz - self.dfz_dx,
-            self.dfy_dx - self.dfx_dy,
-        )
-    }
-}
-
-pub struct StepEContext<'a, R, M> {
-    inv_feature_size: R,
-    time_step: R,
-    stim_e: Vec3<R>,
-    mat: ArrayHandle<'a, M>,
-    /// Input field sampled near this location
-    in_h: VolumeSampleNeg<R>,
-    /// Handle to the output field at one specific index.
-    out_e: ArrayHandleMut<'a, Vec3<R>>,
-}
-
-impl<'a, R: Real, M: Material<R>> StepEContext<'a, R, M> {
-    pub fn step_e(mut self) {
-        let twice_eps0 = R::twice_eps0();
-        let deltas = self.in_h.delta_h();
-        // \nabla x H
-        let nabla_h = deltas.nabla() * self.inv_feature_size;
-        // $\nabla x H = \epsilon_0 dE/dt + \sigma E$
-        // no-conductivity version:
-        //   let delta_e = nabla_h * (self.time_step * EPS0_INV);
-        let sigma = self.mat.get_ref().conductivity();
-        let e_prev = self.out_e.get();
-        let delta_e = (nabla_h - e_prev.elem_mul(sigma)).elem_div(
-            sigma*self.time_step + Vec3::uniform(twice_eps0)
-        )*(R::two()*self.time_step);
-        // println!("spirv-step_e delta_e: {:?}", delta_e);
-        self.out_e.write(e_prev + delta_e + self.stim_e);
-    }
-}
-
-
-pub struct StepHContext<'a, R, M> {
-    inv_feature_size: R,
-    time_step: R,
-    stim_h: Vec3<R>,
-    mat: ArrayHandle<'a, M>,
-    /// Input field sampled near this location
-    in_e: VolumeSamplePos<R>,
-    /// Handle to the output field at one specific index.
-    out_h: ArrayHandleMut<'a, Vec3<R>>,
-    out_m: ArrayHandleMut<'a, Vec3<R>>,
-}
-
-impl<'a, R: Real, M: Material<R>> StepHContext<'a, R, M> {
-    pub fn step_h(mut self) {
-        let mu0 = R::mu0();
-        let mu0_inv = R::mu0_inv();
-        let deltas = self.in_e.delta_e();
-        // println!("spirv-step_h delta_e_struct: {:?}", deltas);
-        // \nabla x E
-        let nabla_e = deltas.nabla() * self.inv_feature_size;
-        // println!("spirv-step_h nabla_e: {:?}", nabla_e);
-        let delta_b = nabla_e * (-self.time_step);
-
-        // Relation between these is: B = mu0*(H + M)
-        let old_h = self.out_h.get();
-        let old_m = self.out_m.get();
-        let old_b = (old_h + old_m) * mu0;
-
-        let new_b = old_b + delta_b + self.stim_h * mu0;
-        let mat = self.mat.get_ref();
-        let new_m = mat.move_b_vec(old_m, new_b);
-        let new_h = new_b * mu0_inv - new_m;
-        // println!("spirv-step_h delta_h: {:?}", delta_h);
-        self.out_h.write(new_h);
-        self.out_m.write(new_m);
-    }
-}
--- a/crates/spirv_backend/src/support.rs
+++ b/crates/spirv_backend/src/support.rs
@@ -1,284 +1,51 @@
-use coremem_types::vec::Vec3u;
+use core::ops::{Index, IndexMut};

-/// This is a spirv-compatible option type.
-/// The native rust Option type produces invalid spirv due to its enum nature; this custom option
-/// type creates code which will actually compile.
-#[derive(Copy, Clone, PartialEq)]
-pub struct Optional<T> {
-    // XXX: not a bool, because: "entrypoint parameter cannot contain a boolean"
-    present: u8,
-    data: T,
+use spirv_std::RuntimeArray;
+
+/// this is intended to wrap an unsized array with a length and provide safe indexing operations
+/// into it. it behaves quite similar to a native rust slice, and ideally we'd just use that but
+/// spirv support for slices is poor.
+pub struct SizedArray<T> {
+    items: T,
+    len: usize,
 }

-impl<T> Optional<T> {
-    pub fn some(data: T) -> Self {
-        Self {
-            present: 1,
-            data,
-        }
-    }
-
-    pub fn explicit_none(data: T) -> Self {
-        Self {
-            present: 0,
-            data,
-        }
-    }
-
-    pub fn is_some(self) -> bool {
-        self.present != 0
-    }
-
-    pub fn unwrap(self) -> T {
-        assert!(self.present != 0);
-        self.data
-    }
-
-    pub fn map<U: Default, F: FnOnce(T) -> U>(self, f: F) -> Optional<U> {
-        self.and_then(|inner| Optional::some(f(inner)))
-    }
-
-    pub fn and_then<U: Default, F: FnOnce(T) -> Optional<U>>(self, f: F) -> Optional<U> {
-        if self.present != 0 {
-            f(self.unwrap())
-        } else {
-            Optional::none()
-        }
-    }
-
-    pub fn unwrap_or(self, default: T) -> T {
-        if self.present != 0 {
-            self.unwrap()
-        } else {
-            default
-        }
+impl<T> SizedArray<T> {
+    /// construct the slice from some other collection and the number of items in said collection.
+    /// safety: caller must validate that it's safe to index all `len` elements in the underlying
+    /// collection.
+    pub unsafe fn new(items: T, len: usize) -> Self {
+        Self { items, len }
    }
 }

-impl<T: Default> Optional<T> {
-    pub fn none() -> Self {
-        Self::explicit_none(Default::default())
-    }
+impl<'a, T> Index<usize> for SizedArray<&'a RuntimeArray<T>> {
+    type Output=T;

-    pub fn unwrap_or_default(self) -> T {
-        self.unwrap_or(Default::default())
-    }
-}
-
-impl<T: Default> Default for Optional<T> {
-    fn default() -> Self {
-        Self::none()
-    }
-}
-
-impl<T0: Default, T1: Default> Optional<(T0, T1)> {
-    pub fn flatten((f0, f1): (Optional<T0>, Optional<T1>)) -> Self {
-        if f0.present != 0 && f1.present != 0 {
-            Optional::some((f0.unwrap(), f1.unwrap()))
-        } else {
-            Optional::none()
-        }
-    }
-}
-
-/// This struct allows doing things like *(ptr + offset) = value.
-/// Such code wouldn't ordinarily compile with the spirv target because of
-/// unsupported pointer math. RuntimeArray exists to overcome this, however
-/// it emits invalid code for non-primitive types. Hence, this hack.
-// XXX: maximum bytes an array may occupy is 0x7fff_ffff
-// We don't know the element size, so assume it to be <= 44 bytes
-// pub const MAX_UNSIZED_ARRAY: usize = 0x2e7_ffff;
-pub const MAX_UNSIZED_ARRAY: usize = 0x1ff_ffff;
-pub struct UnsizedArray<T>(pub [T; MAX_UNSIZED_ARRAY]);
-
-impl<T: Copy> UnsizedArray<T> {
-    pub unsafe fn index(&self, index: usize) -> T {
-        // *self.0.index(index)
-        self.0[index]
-        // *self.0.get_unchecked(index)
-        // &self.0
-        //asm! {
-        //    "%result = OpAccessChain _ {arr} {index}",
-        //    "OpReturnValue %result",
-        //    "%unused = OpLabel",
-        //    arr = in(reg) self,
-        //    index = in(reg) index,
-        //}
-    }
-
-    pub unsafe fn write(&mut self, index: usize, value: T) {
-        self.0[index] = value;
-    }
-}
-
-impl<T> UnsizedArray<T> {
-    pub unsafe fn index_ref(&self, index: usize) -> &T {
-        &self.0[index]
-    }
-
-    pub unsafe fn get_handle<'a>(&'a self, index: usize) -> ArrayHandle<'a, T> {
-        ArrayHandle {
-            base: self,
-            index,
-        }
-    }
-
-    pub unsafe fn get_handle_mut<'a>(&'a mut self, index: usize) -> ArrayHandleMut<'a, T> {
-        ArrayHandleMut {
-            base: self,
-            index,
-        }
-    }
-}
-
-pub struct ArrayHandle<'a, T> {
-    base: &'a UnsizedArray<T>,
-    index: usize,
-}
-
-impl<'a, T: Copy> ArrayHandle<'a, T> {
-    pub fn get(&self) -> T {
+    fn index(&self, idx: usize) -> &Self::Output {
+        debug_assert!(idx < self.len);
        unsafe {
-            self.base.index(self.index)
+            self.items.index(idx)
        }
    }
-    pub fn get_into(&self, out: &mut T) {
-        core::clone::Clone::clone_from(out, self.get_ref());
-    }
 }
-impl<'a, T> ArrayHandle<'a, T> {
-    pub fn get_ref(&self) -> &T {
+
+impl<'a, T> Index<usize> for SizedArray<&'a mut RuntimeArray<T>> {
+    type Output=T;
+
+    fn index(&self, idx: usize) -> &Self::Output {
+        debug_assert!(idx < self.len);
        unsafe {
-            self.base.index_ref(self.index)
+            self.items.index(idx)
        }
    }
 }
-
-
-pub struct ArrayHandleMut<'a, T> {
-    base: &'a mut UnsizedArray<T>,
-    index: usize,
-}
-
-impl<'a, T: Copy> ArrayHandleMut<'a, T> {
-    pub fn write(&mut self, value: T) {
+impl<'a, T> IndexMut<usize> for SizedArray<&'a mut RuntimeArray<T>> {
+    fn index_mut(&mut self, idx: usize) -> &mut Self::Output {
+        debug_assert!(idx < self.len);
        unsafe {
-            self.base.write(self.index, value);
-        }
-    }
-    pub fn get(&self) -> T {
-        unsafe {
-            self.base.index(self.index)
+            self.items.index_mut(idx)
        }
    }
 }

-/// 3d dynamically-sized array backed by a borrowed buffer.
-#[derive(Clone, Copy)]
-pub struct Array3<'a, T> {
-    data: &'a UnsizedArray<T>,
-    dim: Vec3u,
-}
-
-fn index(loc: Vec3u, dim: Vec3u) -> usize {
-    ((loc.z()*dim.y() + loc.y())*dim.x() + loc.x()) as usize
-}
-
-fn checked_index(idx: Vec3u, dim: Vec3u) -> Optional<usize> {
-    if idx.x() < dim.x() && idx.y() < dim.y() && idx.z() < dim.z() {
-        let flat_idx = index(idx, dim);
-        Optional::some(flat_idx)
-    } else {
-        Optional::none()
-    }
-}
-
-impl<'a, T> Array3<'a, T> {
-    pub fn new(data: &'a UnsizedArray<T>, dim: Vec3u) -> Self {
-        Self {
-            data,
-            dim,
-        }
-    }
-
-    pub fn index(self, idx: Vec3u) -> Optional<usize> {
-        checked_index(idx, self.dim)
-    }
-
-    pub fn into_handle(self, idx: Vec3u) -> ArrayHandle<'a, T> {
-        let idx = checked_index(idx, self.dim).unwrap();
-        unsafe {
-            self.data.get_handle(idx)
-        }
-    }
-}
-
-impl<'a, T: Copy + Default> Array3<'a, T> {
-    pub fn get(&self, idx: Vec3u) -> Optional<T> {
-        let idx = self.index(idx);
-        if idx.is_some() {
-            Optional::some(unsafe {
-                self.data.index(idx.unwrap())
-            })
-        } else {
-            Optional::none()
-        }
-    }
-}
-
-/// 3d dynamically-sized array backed by a mutably borrowed buffer.
-pub struct Array3Mut<'a, T> {
-    data: &'a mut UnsizedArray<T>,
-    dim: Vec3u,
-}
-
-impl<'a, T> Array3Mut<'a, T> {
-    pub fn new(data: &'a mut UnsizedArray<T>, dim: Vec3u) -> Self {
-        Self {
-            data,
-            dim,
-        }
-    }
-
-    // fn as_array3<'b>(&'b self) -> Array3<'b, T> {
-    //     Array3::new(self.data, self.dim)
-    // }
-
-    pub fn index(&self, idx: Vec3u) -> Optional<usize> {
-        if idx.x() < self.dim.x() && idx.y() < self.dim.y() && idx.z() < self.dim.z() {
-            let flat_idx = index(idx, self.dim);
-            Optional::some(flat_idx)
-        } else {
-            Optional::none()
-        }
-    }
-}
-
-impl<'a, T: Copy> Array3Mut<'a, T> {
-    pub fn into_mut_handle(self, idx: Vec3u) -> ArrayHandleMut<'a, T> {
-        let idx = self.index(idx).unwrap();
-        unsafe {
-            self.data.get_handle_mut(idx)
-        }
-    }
-}
-
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    #[test]
-    fn test_index() {
-        let dim = Vec3u::new(2, 3, 7);
-        assert_eq!(index(Vec3u::new(0, 0, 0), dim), 0);
-        assert_eq!(index(Vec3u::new(1, 0, 0), dim), 1);
-        assert_eq!(index(Vec3u::new(0, 1, 0), dim), 2);
-        assert_eq!(index(Vec3u::new(1, 1, 0), dim), 3);
-        assert_eq!(index(Vec3u::new(0, 2, 0), dim), 4);
-        assert_eq!(index(Vec3u::new(0, 0, 1), dim), 6);
-        assert_eq!(index(Vec3u::new(1, 0, 1), dim), 7);
-        assert_eq!(index(Vec3u::new(0, 1, 1), dim), 8);
-        assert_eq!(index(Vec3u::new(1, 2, 1), dim), 11);
-        assert_eq!(index(Vec3u::new(1, 2, 2), dim), 17);
-    }
-}
--- a/crates/types/src/lib.rs
+++ b/crates/types/src/lib.rs
@@ -1,8 +0,0 @@
-#![no_std]
-#![feature(core_intrinsics)]
-
-mod compound;
-pub mod mat;
-pub mod real;
-pub mod vec;
-mod vecu;
Author	SHA1	Message	Date
colin	4361167f99	stim: strongly-type the return type of AbstractSim::at with a `Fields` struct this will help me not mix up the E and H fields.	2022-07-30 17:17:17 -07:00
colin	6a511943f7	note a few suspect areas of code	2022-07-30 17:02:10 -07:00
colin	a14625b493	meas: add SI units for some things this is an uncommon code path, apparently: only visible when rendering BEFORE serialization. may want to implement a richer meas format.	2022-07-29 23:54:02 -07:00
colin	6f0e35ea35	multi_core_inverter: add some stimuli and measurements	2022-07-29 23:53:44 -07:00
colin	7f3c2a9395	render: transform inaccurate float-based indexing into integer indexing	2022-07-29 21:53:49 -07:00
colin	349e01ba16	fix Vec3::with_mag to return an Option and thereby simplify it into just one method.	2022-07-29 21:45:25 -07:00
colin	ba6ef3c5c2	viewer: add a render mode to display just the Material	2022-07-29 16:28:58 -07:00
colin	c5e2713b51	remove unused enum_dispatch	2022-07-29 16:07:07 -07:00
colin	9c1fc65068	convert AbstractSim::sample to include a reference to the material -- not just its conductivity	2022-07-29 16:02:16 -07:00
colin	895c87869b	rename CellStateWithM => Fields; parameterize Sample over R	2022-07-29 14:55:12 -07:00
colin	7e452f508f	AbstractSim: convert the `{get,put}_material` functions to use only Index	2022-07-29 14:43:59 -07:00
colin	f4ac5de099	viewer: add docs	2022-07-29 13:57:17 -07:00
colin	e2c156e790	meas: Evaluated: fix `eval` to return both key and value	2022-07-29 13:39:33 -07:00
colin	604f368f0d	SerializeRenderer: render to `GenericSim`, not `StaticSim`	2022-07-29 13:27:05 -07:00
colin	95213e61be	real: add serialization bounds to the Real trait	2022-07-29 13:22:03 -07:00
colin	95ffb73fe3	add a `to_generic` method to the AbstractSim trait	2022-07-29 13:11:16 -07:00
colin	02920f9bd3	mat: add some missing conversion traits	2022-07-29 13:11:00 -07:00
colin	56f74e6b4a	AbstractSim: remove the Send bound	2022-07-28 23:49:45 -07:00
colin	4f2345f608	rename GenericSim -> AbstractSim	2022-07-28 23:41:42 -07:00
colin	3104c06d95	fold MaterialSim into GenericSim trait	2022-07-28 22:31:47 -07:00
colin	71ab89c4c9	de-virtualize GenericSim this should let us fold the GenericSim and MaterialSim traits together.	2022-07-28 22:22:07 -07:00
colin	2d1a15eabc	AbstractMeasurement: remove the DynClone requirement	2022-07-28 21:49:28 -07:00
colin	3722512554	AbstractMeasurement: remove the serde typetag stuff	2022-07-28 21:46:01 -07:00
colin	5c4b8d86f2	measurements: store to disk after evaluating them i'm hoping to simplify a lot of serialization code with this	2022-07-28 21:43:48 -07:00
colin	32d1a70d15	driver: remove `dyn_state`	2022-07-28 19:19:57 -07:00
colin	6206569f4a	Fold SampleableSim and MaterialSim into one	2022-07-28 16:41:32 -07:00
colin	a49d9cd7a4	sim: fold most accessors behind the `meta` method	2022-07-28 16:25:39 -07:00
colin	0465e3d7f5	sim: remove `impulse_e` methods	2022-07-28 16:17:02 -07:00
colin	afc1f874d2	sim: remove unused `impulse_b`, `impulse_h` methods	2022-07-28 15:55:50 -07:00
colin	1898542372	sim: address a TODO: `get_material` returns a reference	2022-07-28 15:45:51 -07:00
colin	45d2de29c6	rename 'coremem_types' -> 'coremem_cross' to better reflect its purpose	2022-07-28 15:40:23 -07:00
colin	9e35b29087	move `util/` out of `coremem` and into its only user: `buffer_proto5`	2022-07-28 15:30:50 -07:00
colin	80d3765bcf	fix NaN in CurlStimulus code	2022-07-28 13:54:50 -07:00
colin	62afc91879	.gitignore vim swap files	2022-07-28 13:41:30 -07:00
colin	26efc12c21	multi_core_inverter: abstractions to allow swapping out float impl and backend	2022-07-28 13:20:26 -07:00
colin	de0f3d9654	spirv: document a TODO	2022-07-28 13:18:36 -07:00
colin	07dfb9d852	spirv: add R32 support to the GPU code	2022-07-28 13:18:14 -07:00
colin	15fc7b91dc	driver: TODO: split diagnostics into their own struct	2022-07-28 02:06:34 -07:00
colin	c82aab50a2	driver: simplify `add_state_file` implementation	2022-07-28 02:04:49 -07:00
colin	33cb395584	driver: don't let the `state` be public	2022-07-28 02:01:46 -07:00
colin	917a3d3c9d	buffer_proto5: silence some warnings	2022-07-28 02:00:51 -07:00
colin	fe47eb09f8	driver: rename `new_with_state` -> `new`	2022-07-28 01:59:11 -07:00
colin	a6fb21d892	driver: remove `SpirvDriver` alias	2022-07-28 01:55:22 -07:00
colin	7a6bbf06a5	driver: remove `new_spirv` method	2022-07-28 01:52:09 -07:00
colin	50af5927df	Optional::unwrap: switch this to a debug assert	2022-07-28 01:51:39 -07:00
colin	c36d70044a	fix `benches/driver.rs` apparently `cargo build --all` doesn't include this :\|	2022-07-27 17:26:22 -07:00
colin	5a0766451d	spirv: relax some `: 'static` bounds	2022-07-27 17:11:10 -07:00
colin	9e07189b12	SpirvSim: don't always require the backend during construction	2022-07-27 17:04:43 -07:00
colin	920a0b3c9a	spirv backend: remove parameters from WgpuBackend struct	2022-07-27 16:50:51 -07:00
colin	48e8f5d1b4	bench: explicitly specify spirv backend	2022-07-27 16:37:56 -07:00
colin	d5c4e13b84	driver: remove `legacy` uses	2022-07-27 16:34:50 -07:00
colin	1dd6a068ba	replace 'StaticSim' with the SpirvSim type, material being the Vacuum	2022-07-27 16:22:32 -07:00
colin	dc38457a8b	don't re-export StaticSim from sim/mod.rs this way we can clearly spot the legacy users.	2022-07-27 15:42:18 -07:00
colin	93967485f0	spirv: remove the `set_meta` method on the SimBackend backend is responsible for procuring its own resources on the first run.	2022-07-27 14:05:17 -07:00
colin	932bb163c3	SpirvSim: explicitly pass the backend in when initialized	2022-07-27 13:53:32 -07:00
colin	7698e0e5ba	spirv: re-order the SimBackend parameters to be more consistent	2022-07-27 12:47:51 -07:00
colin	6b51fcea02	spirv: cpu: inline most of this step logic	2022-07-27 12:42:44 -07:00
colin	d0afca7e3f	spirv: gpu: simplify some of this entry_point passing	2022-07-27 12:39:15 -07:00
colin	b134fd2373	types: Optional: remove the Into/From<Option> impls they're no longer used	2022-07-27 12:34:32 -07:00
colin	568d61c598	spirv: remove the Optionality around entry points: compute them statically with traits	2022-07-27 12:32:43 -07:00
colin	baaeeb9463	spirv_backend: no need to re-export glam	2022-07-27 12:13:01 -07:00
colin	4bb0bc09ad	spirv/cpu.rs: remove unused import	2022-07-27 12:08:31 -07:00
colin	c85bee20f5	replace some assert's with debug_assert's; slightly more optimal Optional impls	2022-07-27 12:07:30 -07:00
colin	f6a585852e	move the dimensioned operations out of the sim adapters and into step.rs	2022-07-26 18:43:41 -07:00
colin	7d16e87b6e	spirv: port all backends to use R for the stimulus particularly, this patches over a difference where the gpu backend expected the stimulus to be R, while the CPU thought it should be f32. that would likely have revealed a crash if we had tested it with f64 (TODO).	2022-07-26 18:16:10 -07:00
colin	00dcfb170a	spirv_backend/support.rs: remove the re-export of `DimensionedSlice` also add some docs	2022-07-26 18:08:03 -07:00
colin	dbd666d272	move the dimensioned indexing out of spirv_backend and into coremem_types this allows us to use it from the CPU implementation.	2022-07-26 18:03:21 -07:00
colin	d93d14d260	spirv_backend: use RuntimeArray to remove all this UnsizedArray stuff	2022-07-26 15:58:23 -07:00
colin	09f7c8acb9	spirv_backend: support: remove unused helpers	2022-07-26 13:36:42 -07:00
colin	6e4133db4d	spirv backend: simplify the adapt.rs indexing by using the constructors previously created	2022-07-26 13:29:39 -07:00
colin	68d8cdde42	move some of the VolumeSample instantiation into step.rs, out of cpu.rs we can go further: the IndexDim type itself can be moved into step.rs -- maybe? if it were to wrap a generic flat-indexable thing -- either a slice, or an array ref.	2022-07-26 01:15:53 -07:00
colin	92ab220110	spirv: test: remove legacy cpu-only tests these tests are all covered by the backend-agnostic tests	2022-07-26 00:54:33 -07:00
colin	972e0ba4fb	spirv: test: add TODO for moving the cpu tests to be backend-agnostic	2022-07-25 22:38:10 -07:00
colin	d68c1b20be	spirv: test: port the last rgpu test to be backend-agnostic	2022-07-25 22:37:05 -07:00
colin	a969969449	spirv: test: port mh_ferromagnet tests to backend-agnostic	2022-07-25 22:36:46 -07:00
colin	04c6d05ab0	spirv: test: port mb_ferromagnet tests to be backend-agnostic	2022-07-25 22:28:43 -07:00
colin	dc49cddc97	spirv: test: port conductor tests to backend_agnostic	2022-07-25 22:23:56 -07:00
colin	3fa2c22438	spirv: test: port RngStimulus tests to both backends	2022-07-25 22:19:44 -07:00
colin	a8be7279b3	spirv sim: port rgpu smoke tests to test both Gpu and Cpu backend generically	2022-07-25 21:52:08 -07:00
colin	fee9a1c216	implement a CpuBackend for running the "spirv" simulations UNTESTED	2022-07-25 17:58:22 -07:00
colin	47e11474d2	parameterize SpirvSim over R: Real	2022-07-25 14:49:32 -07:00
colin	a1784da1cf	spirv: parameterize over the SimBackend	2022-07-25 14:27:09 -07:00
colin	b4ee42cfdf	spirv: rename WgpuData -> WgpuBackend	2022-07-25 14:11:58 -07:00
colin	cf42ec2dd1	spirv: SimBackend: remove the Array3 use	2022-07-25 14:11:01 -07:00
colin	567f088f98	spirv: hide the gpu ops behind a SimBackend trait	2022-07-25 13:59:28 -07:00
colin	ff1d9867ab	parameterize WgpuData over the M type	2022-07-25 13:15:41 -07:00
colin	0801a0dca3	spirv: remove `bindings.rs` the one function which was in here previously is just inlined into gpu.rs	2022-07-25 13:07:35 -07:00
colin	7cf8ed9a7b	spirv: gpu.rs no longer references the super SpirvSim type	2022-07-25 13:06:00 -07:00
colin	8c8e707407	spirv: move the stimulus application out of gpu.rs	2022-07-25 12:52:35 -07:00
colin	5b8978f0ec	spirv: instantiate the backend in mod.rs, not gpu.rs	2022-07-25 12:47:39 -07:00
colin	bd066331de	spirv: fix indendation	2022-07-25 12:45:07 -07:00
colin	cfbd5547cb	spirv: move indexable check into the gpu.rs backend	2022-07-25 12:44:02 -07:00
colin	d1765554fc	spirv/gpu.rs: don't hard-code Vec3<f32> size in the future this may become parameterized	2022-07-25 12:36:50 -07:00
colin	38a47a0054	split most of the GPU spirv sim stuff into its own file	2022-07-25 12:29:45 -07:00
colin	2032e90688	spirv_bindings: remove the IntoFfi/FromFfi stuff	2022-07-25 00:55:43 -07:00
colin	8a16a5ce30	lift SimMeta from spirv_backend -> coremem_types	2022-07-25 00:52:11 -07:00
colin	15aaa3e893	move spirv_backend/sim.rs -> coremem_types/step.rs	2022-07-25 00:40:27 -07:00
colin	5fec965549	Optional: derive fmt and serde traits based on feature flag	2022-07-25 00:35:57 -07:00
colin	5490634fe7	move `Optional` out of spirv_backend and into coremem_types	2022-07-25 00:35:04 -07:00
colin	9c7ef7ec88	spirv_backend: split the array operations out of sim.rs -> adapt.rs	2022-07-25 00:28:03 -07:00
colin	8ab89640f2	spirv_backend: split out some of the spirv entry point adapters into `adapt.rs`	2022-07-25 00:21:20 -07:00
colin	ebd2762d7a	spirv: sim: adjust so `Step{E,H}Context` does not use `ArrayHandle` the specific way to accomplish this is touchy. see <https://github.com/EmbarkStudios/rust-gpu/issues/312#issuecomment-738824131>: > So I'd say placing any of the spirv_std::storage_class types into an aggregate (including capturing it in a closure) is unsupported for now in our specific case, we can't return a tuple where one element is a `&` to a spirv Input, and another element is a `&mut` to a spirv Output. when we have a struct, it can enclose either ONLY inputs, or ONLY outputs -- not a mix. i'm not 100% on how the Serialized stuff works, since it appears to violate that. i guess that's exactly what this ArrayHandle stuff achieves though.	2022-07-24 22:57:41 -07:00
colin	05f5f75dd3	spirv: remove the ArrayHandleMut artifacts in `Step{H,E}Context` this will make it easier to reuse these blocks on the CPU side.	2022-07-24 22:17:44 -07:00
colin	b70cafa205	spirv support: fix an overly-constrained lifetime parameter in the array index fn	2022-07-24 21:51:20 -07:00