README: clarify how to implement _compound_ materials

README: fix typos, update code paths
flake: rust-overlay: 2024-08-01 -> 2025-01-11
2025-01-15 02:04:14 +00:00 · 2025-01-15 01:51:46 +00:00 · 2025-01-15 01:47:30 +00:00 · 2025-01-15 01:47:30 +00:00 · 2025-01-15 01:47:30 +00:00 · 2025-01-15 01:47:30 +00:00
131 changed files with 31052 additions and 7983 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 out/
 target/
 __pycache__/
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,12 +5,13 @@ members = [
    "crates/spirv_backend",
    "crates/spirv_backend_builder",
    "crates/spirv_backend_runner",
-    "crates/types",
+    "crates/cross",
    "crates/post",
    "crates/applications/buffer_proto5",
    "crates/applications/multi_core_inverter",
    "crates/applications/sr_latch",
    "crates/applications/stacked_cores",
    "crates/applications/wavefront",
 ]
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ to model the evolution of some 3d (or 2d) grid-volume of space over time. simula
 - some material at each position in the grid
 - a set of stimuli to apply at specific regions in the volume over time
- a set of "measurements" to evaluate and record as the simulation evolves.
+- a set of "measurements" to evaluate and record as the simulation evolves
 - an optional state file to allow pausing/resumption of long-run simulations
 after this the simulation is advanced in steps up to some user-specified moment in time.
@@ -19,8 +19,20 @@ examples are in the [crates/applications/](crates/applications/) directory.
 here's an excerpt from the [wavefront](crates/applications/wavefront/src/main.rs) example:
 ```rust
-// Create the simulation "driver" which uses the CPU as backend.
+// use a general-purpose material, capable of representing vacuum, conductors, and magnetic materials.
-let mut driver: driver::CpuDriver = driver::Driver::new(size, feature_size);
+type Mat = mat::GenericMaterial<f32>;
 // simulate a volume of 401x401x1 discrete grid cells.
 let (width, height, depth) = (401, 401, 1);
 let size = Index::new(width, height, depth);
 // each cell represents 1um x 1um x 1um volume.
 let feature_size = 1e-6;
 // create the simulation "driver".
 // the first parameter is the float type to use: f32 for unchecked math, coremem::real::R32
 // to guard against NaN/Inf (useful for debugging).
 // to run this on the gpu instead of the gpu, replace `CpuBackend` with `WgpuBackend`.
 let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::CpuBackend>::new(size, feature_size));
 // create a conductor on the left side.
 let conductor = Cube::new(
@@ -34,17 +46,18 @@ let center_region = Cube::new(
    Index::new(200, height/4, 0).to_meters(feature_size),
    Index::new(201, height*3/4, 1).to_meters(feature_size),
 );
 // emit a constant E/H delta over this region for 100 femtoseconds
-let stim = Stimulus::new(
+let stim = ModulatedVectorField::new(
-    center_region,
+    RegionGated::new(center_region, Fields::new_eh(
-    UniformStimulus::new(
+        Vec3::new(2e19, 0.0, 0.0),
-        Vec3::new(2e19, 0.0, 0.0),  // E field (per second)
+        Vec3::new(0.0, 0.0, 2e19/376.730),
-        Vec3::new(0.0, 0.0, 2e19/376.730)  // H field (per second)
+    )),
-    ).gated(0.0, 100e-15),
+    Pulse::new(0.0, 100e-15),
 );
 driver.add_stimulus(stim);
-// finally, run the simulation:
+// finally, run the simulation through t=100ps
 driver.step_until(Seconds(100e-12));
 ```
@@ -62,26 +75,25 @@ which can easily be 30-100x faster:
 ## GPU Acceleration
-we use rust-gpu for gpu acceleration. presently, this requires *specific* versions of rust-nightly to work.
+we use [rust-gpu]((https://github.com/EmbarkStudios/rust-gpu/)) for gpu acceleration.
 presently, this requires *specific* versions of rust-nightly to work.
 the feature is toggled at runtime, but compiled unconditionally. set up the toolchain according to [rust-toolchain.toml](rust-toolchain.toml):
 ```
-$ rustup default nightly-2022-04-11
+$ rustup default nightly-2023-01-21
 $ rustup component add rust-src rustc-dev llvm-tools-preview
 ```
 (it's possible to work with older nightlies like `nightly-2022-01-13` or `nightly-2021-06-08` if you enable the 2020 feature and downgrade whichever packages rustc complains about.)
 now you can swap out the `CpuDriver` with a `SpirvDriver` and you're set:
 ```diff
-    let mut driver: driver::CpuDriver = driver::Driver::new(size, feature_size);
+-    let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::CpuBackend>::new(size, feature_size));
-+    let mut driver: driver::SpirvDriver = driver::Driver::new_spirv(size, feature_size);
+    let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::WgpuBackend>::new(size, feature_size));
 ```
 re-run it as before and you should see the same results:
 ```
-$ cargo run --release --example wavefront
+$ cargo run --release --bin wavefront
 ```
 see the "Processing Loop" section below to understand what GPU acceleration entails.
@@ -90,10 +102,10 @@ see the "Processing Loop" section below to understand what GPU acceleration enta
 the [sr\_latch](crates/applications/sr_latch/src/main.rs) example explores a more interesting feature set.
 first, it "measures" a bunch of parameters over different regions of the simulation
-(peak inside [`src/meas.rs`](crates/coremem/src/meas.rs) to see how these each work):
+(peek inside [`meas.rs`](crates/coremem/src/meas.rs) to see how these each work):
 ```rust
-// measure a bunch of items of interest throughout the whole simulation duration:
+// measure some items of interest throughout the whole simulation duration:
 driver.add_measurement(meas::CurrentLoop::new("coupling", coupling_region.clone()));
 driver.add_measurement(meas::Current::new("coupling", coupling_region.clone()));
 driver.add_measurement(meas::CurrentLoop::new("sense", sense_region.clone()));
@@ -121,20 +133,20 @@ allowing you to dig further into the simulation in an _interactive_ way (versus
 renderer used in the `wavefront` example):
 ```rust
-// serialize frames for later viewing with `cargo run -p coremem_post --release --bin viewer`
+// serialize frames for later viewing with `cargo run --release --bin viewer`
 driver.add_serializer_renderer(&*format!("{}frame-", prefix), 36000, None);
 ```
 run this, after having setup the GPU pre-requisites:
 ```
-$ cargo run --release --example sr_latch
+$ cargo run --release --bin sr_latch
 ```
 and then investigate the results with
 ```
-$ cargo run -p coremem_post --bin viewer ./out/applications/sr_latch
+$ cargo run --release --bin viewer ./out/applications/sr_latch
 ```
 ![screencapture of Viewer for SR latch at t=2.8ns. it shows two rings spaced horizontally, with arrows circulating them](readme_images/sr_latch_EzBxy_2800ps.png "SR latch at t=2.8ns")
@@ -145,35 +157,38 @@ the light blue splotches depict the conductors (in the center, the wire coupling
 what we see here is that both ferrites (the two large circles in the above image) have a clockwise polarized B field. this is in the middle of a transition, so the E fields look a bit chaotic. advance to t=46 ns: the "reset" pulse was applied at t=24ns and had 22ns to settle:
-![screencapture of Viewer for SR latch at t=45.7ns. similar to above but with the B field polarized CCW](readme_images/sr_latch_EzBxy_45700ps.png "SR latch at t=45.7ns")
+![screencapture of Viewer for SR latch at t=45.7ns. similar to above but with the B field polarized counter-clockwise](readme_images/sr_latch_EzBxy_45700ps.png "SR latch at t=45.7ns")
-we can see the "reset" pulse has polarized both ferrites in the CCW orientation this time. the E field is less pronounced because we gave the system 22ns instead of 3ns to settle this time.
+we can see the "reset" pulse has polarized both ferrites in the counter-clockwise orientation this time. the E field is less pronounced because we gave the system 22ns instead of 3ns to settle this time.
-the graphical viewer is helpful for debugging geometries, but the CSV measurements are useful for viewing numeric system performance. peak inside "out/applications/sr-latch/meas.csv" to see a bunch of measurements over time. you can use a tool like Excel or [visidata](https://www.visidata.org/) to plot the interesting ones.
+the graphical viewer is helpful for debugging geometries, but the CSV measurements are useful for viewing numeric system performance. peek inside "out/applications/sr-latch/meas.csv" to see a bunch of measurements over time. you can use a tool like Excel or [visidata](https://www.visidata.org/) to plot the interesting ones.
 here's a plot of `M(mem2)` over time from the SR latch simulation. we're measuring, over the torus volume corresponding to the ferrite on the right in the images above, the (average) M component normal to each given cross section of the torus. the notable bumps correspond to these pulses: "set", "reset", "set", "reset", "set+reset applied simultaneously", "set", "set".
 here's a plot of `M(mem2)` over time from the SR latch simulation. we're measuring the (average) M value along the major tangent to the torus corresponding to the ferrite on the right in the images above. the notable bumps correspond to these pulses: "set", "reset", "set", "reset", "set+reset applied simultaneously", "reset", "reset".
 ![plot of M(mem2) over time](readme_images/sr_latch_vd_M2.png "plot of M(mem2) over time")
 ## Processing Loop (and how GPU acceleration works)
-the processing loop for a simulation is roughly as follows ([`src/driver.rs:step_until`](crates/coremem/src/driver.rs) drives this loop):
+the processing loop for a simulation is roughly as follows ([`driver.rs:step_until`](crates/coremem/src/driver.rs) drives this loop):
-1. evaluate all stimuli at the present moment in time; these produce an "externally applied" E and H field
+1. evaluate all stimuli at the present moment in time;
-   across the entire volume.
+   these produce an "externally applied" E and H field across the entire volume.
 2. apply the FDTD update equations to "step" the E field, and then "step" the H field. these equations take the external stimulus from step 1 into account.
 3. evaluate all the measurement functions over the current state; write these to disk.
 4. serialize the current state to disk so that we can resume from this point later if we choose.
-within each step above, the logic is multi-threaded and the rendeveous points lie at the step boundaries.
+within each step above, the logic is multi-threaded and the rendezvous points lie at the step boundaries.
-it turns out that the Courant rules force us to evaluate FDTD updates (step 2) on a _far_ smaller time scale than the other steps are sensitive to. so to tune for performance, we apply some optimizations here universally:
+it turns out that the Courant rules force us to evaluate FDTD updates (step 2) on a _far_ smaller time scale than the other steps are sensitive to. so to tune for performance, we apply some optimizations:
- stimuli (step 1) are evaluated only once every N frames (tunable). we still *apply* them on each frame individually. the waveform resembles that of a Sample & Hold circuit.
+- stimuli (step 1) are evaluated only once every N frames. we still *apply* them on each frame individually. the waveform resembles that of a Sample & Hold circuit.
 - measurement functions (step 3) are triggered only once every M frames.
 - the state is serialized (step 4) only once every Z frames.
 `N`, `M`, and `Z` are all tunable by the application.
 as a result, step 2 is actually able to apply the FDTD update functions not just once but up to `min(N, M, Z)` times.
-although steps 1 and 3 vary heavily based on the user configuration of the simulation, step 2 can be defined pretty narrowly in code (no user-callbacks/dynamic function calls/etc). this lets us offload the processing of step 2 to a dedicated GPU. by tuning N/M/Z, step 2 becomes the dominant cost in our simulations an GPU offloading can trivially boost performance by more than an order of magnitude on even a mid-range consumer GPU.
+although steps 1 and 3 vary heavily based on the user configuration of the simulation, step 2 can be defined pretty narrowly in code (no user-callbacks/dynamic function calls/etc). this lets us offload the processing of step 2 to a dedicated GPU. by tuning N/M/Z, step 2 becomes the dominant cost in our simulations and GPU offloading can easily boost performance by more than an order of magnitude on even a mid-range consumer GPU.
 # Features
@@ -183,76 +198,66 @@ this library takes effort to separate the following from the core/math-heavy "si
 - Measurements
 - Render targets (video, CSV, etc)
 - Materials (conductors, non-linear ferromagnets)
- Float implementation (for CPU simulations only)
+- Float implementation
 the simulation only interacts with these things through a trait interface, such that they're each swappable.
-common stimuli type live in [src/stim.rs](crates/coremem/src/stim.rs).
+common stimuli type live in [stim/mod.rs](crates/coremem/src/stim/mod.rs).
-common measurements live in [src/meas.rs](crates/coremem/src/meas.rs).
+common measurements live in [meas.rs](crates/coremem/src/meas.rs).
-common render targets live in [src/render.rs](crates/coremem/src/render.rs). these change infrequently enough that [src/driver.rs](crates/coremem/src/driver.rs) has some specialized helpers for each render backend.
+common render targets live in [render.rs](crates/coremem/src/render.rs). these change infrequently enough that [driver.rs](crates/coremem/src/driver.rs) has some specialized helpers for each render backend.
-common materials are spread throughout [src/mat](crates/coremem/src/mat/mod.rs).
+common materials are spread throughout [mat/mod.rs](crates/cross/src/mat/mod.rs).
-different float implementations live in [src/real.rs](crates/coremem/src/real.rs).
+different float implementations live in [real.rs](crates/cross/src/real.rs).
 if you're getting NaNs, you can run the entire simulation on a checked `R64` type in order to pinpoint the moment those are introduced.
 ## Materials
-of these, the materials have the most "gotchas".
+each cell is modeled as having a vector E, H and M field, as well as a Material type defined by the application.
 each cell owns an associated material instance.
 in the original CPU implementation of this library, each cell had a `E` and `H` component,
 and any additional state was required to be held in the material. so a conductor material
 might hold only some immutable `conductivity` parameter, while a ferromagnetic material
 might hold similar immutable material parameters _and also a mutable `M` field_.
-spirv/rust-gpu requires stronger separation of state, and so this `M` field had to be lifted
+the `Material` trait has the following methods (both are optional):
-completely out of the material. as a result, the material API differs slightly between the CPU
+```
-and spirv backends. as you saw in the examples, that difference doesn't have to appear at the user
+pub trait Material<R: Real>: Sized {
-level, but you will see it if you're adding new materials.
+    fn conductivity(&self) -> Vec3<R>;
    /// returns the new M vector for this material. called during each `step_h`.
    fn move_b_vec(&self, m: Vec3<R>, target_b: Vec3<R>) -> Vec3<R>;
 }
 ```
-### Spirv Materials
+to add a new material:
 - for `CpuBackend` simulations: just implement this trait on your own type and instantiate a `SpirvSim` specialized over that material instead of `GenericMaterial`.
 - for `WgpuBackend` simulations: do the above and add a spirv entry-point specialized to your material. scroll to the bottom of
 [crates/spirv_backend/src/lib.rs](crates/spirv_backend/src/lib.rs) and follow the examples.
-all the materials usable in the spirv backend live in [`crates/spirv_backend/src/mat.rs`](crates/spirv_backend/src/mat.rs).
+to use your new material alongside other materials like `IsomorphicConductor`, leverage the compound type wrappers
-to add a new one, implement the `Material` trait in that file on some new type, which must also
+in [`compound.rs`](./crates/cross/src/mat/compound.rs):
-be in that file.
+`let my_sim = SpirvSim::<f32, DiscrMat2<IsomorphicConductor<f32>, MyMat>>::new(...)`
-next, add an analog type somewhere in the main library, like [`src/mat/mh_ferromagnet.rs`](crates/coremem/src/mat/mh_ferromagnet.rs). this will
+if the compound wrappers seem complicated, it's because they have to be in order to be compatible with SPIR-V, which
-be the user-facing material.
+does not generally allow addresses to refer to more than one type.
-now implement the `IntoFfi` and `IntoLib` traits for this new material inside [`src/sim/spirv/bindings.rs`](crates/coremem/src/sim/spirv/bindings.rs)
+hence something like `enum { A(IsomorphicConductor<f32>), B(MyMat) }`
-so that the spirv backend can translate between its GPU-side material and your CPU-side/user-facing material.
+has to instead be represented in memory like `(u8 /*discriminant*/, IsomorphicConductor<f32>, MyMat)`.
-
+in practice, we do some extra tricks to fold the discriminant into the other fields and reduce memory usage.
 finally, because cpu-side `SpirvSim<M>` is parameterized over a material, but the underlying spirv library
 is compiled separately, the spirv library needs specialized dispatch logic for each value of `M` you might want
 to use. add this to [`crates/spirv_backend/src/lib.rs`](crates/spirv_backend/src/lib.rs) (it's about five lines: follow the example of `Iso3R1`).
 ### CPU Materials
 adding a CPU material is "simpler". just implement the `Material` trait in [`src/mat/mod.rs`](crates/coremem/src/mat/mod.rs).
 either link that material into the `GenericMaterial` type in the same file (if you want to easily
 mix materials within the same simulation), or if that material can handle every cell in your
 simulation then instantiance a `SimState<M>` object which is directly parameterized over your material.
 as can be seen, the Material trait is fairly restrictive. its methods are immutable, and it doesn't even have access to the entire cell state (only the cell's M value, during `move_b_vec`). i'd be receptive to a PR or request that exposes more cell state or mutability: this is just an artifact of me tailoring this specifically to the class of materials i intended to use it for.
 ## What's in the Box
 this library ships with the following materials:
- conductors (Isomorphic or Anisomorphic). supports CPU or GPU.
+- conductors (Isomorphic or Anisomorphic).
 - linear magnets (defined by their relative permeability, mu\_r). supports CPU only.
 - a handful of ferromagnet implementations:
-  - `MHPgram` specifies the `M(H)` function as a parallelogram. supports CPU or GPU.
+  - `MHPgram` specifies the `M(H)` function as a parallelogram.
-  - `MBPgram` specifies the `M(B)` function as a parallelogram. supports CPU or GPU.
+  - `MBPgram` specifies the `M(B)` function as a parallelogram.
  - `MHCurve` specifies the `M(H)` function as an arbitrary polygon. requires a new type for each curve for memory reasons (see `Ferroxcube3R1`). supports CPU only.
-measurements include ([src/meas.rs](crates/coremem/src/meas.rs)):
+measurements include ([meas.rs](crates/coremem/src/meas.rs)):
 - E, B or H field (mean vector over some region)
 - energy, power (net over some region)
 - current (mean vector over some region)
 - mean current magnitude along a closed loop (toroidal loops only)
 - mean magnetic polarization magnitude along a closed loop (toroidal loops only)
-output targets include ([src/render.rs](crates/coremem/src/render.rs)):
+output targets include ([render.rs](crates/coremem/src/render.rs)):
 - `ColorTermRenderer`: renders 2d-slices in real-time to the terminal.
 - `Y4MRenderer`: outputs 2d-slices to an uncompressed `y4m` video file.
- `SerializerRenderer`: dumps the full 3d simulation state to disk. parseable after the fact with [src/bin/viewer.rs](crates/post/src/bin/viewer.rs).
+- `SerializerRenderer`: dumps the full 3d simulation state to disk. parseable after the fact with [viewer.rs](crates/post/src/bin/viewer.rs).
 - `CsvRenderer`: dumps the output of all measurements into a `csv` file.
 historically there was also a plotly renderer, but that effort was redirected into developing the viewer tool better.
@@ -266,13 +271,13 @@ in a FDTD simulation, as we shrink the cell size the time step has to shrink too
 this is the "default" optimized version. you could introduce a new material to the simulation, and performance would remain constant. as you finalize your simulation, you can specialize it a bit and compile the GPU code to optimize for your specific material. this can squeeze another factor-of-2 gain: view [buffer\_proto5](crates/applications/buffer_proto5/src/main.rs) to see how that's done.
-contrast that to the CPU-only implementation which achieves 24.6M grid cell steps per second: that's about a 34x gain.
+contrast that to the CPU-only implementation which achieves 24.6M grid cell steps per second on my 12-core Ryzen 3900X: that's about a 34x gain.
 # Support
-the author can be reached on Matrix <@colin:uninsane.org> or Activity Pub <@colin@fed.uninsane.org>. i poured a lot of time into making
+the author can be reached on Matrix <@colin:uninsane.org>, email <mailto:colin@uninsane.org> or Activity Pub <@colin@fed.uninsane.org>.
-this: i'm happy to spend the marginal extra time to help curious people make use of what i've made, so don't hesitate to reach out.
+i'd love for this project to be useful to people besides just myself, so don't hesitate to reach out.
 ## Additional Resources
@@ -293,16 +298,5 @@ David Bennion and Hewitt Crane documented their approach for transforming Diode-
 although i decided not to use PML, i found Steven Johnson's (of FFTW fame) notes to be the best explainer of PML:
 - [Steven Johnson: Notes on Perfectly Matched Layers (PMLs)](https://math.mit.edu/~stevenj/18.369/spring07/pml.pdf)
-a huge thanks to everyone above for sharing the fruits of their studies. though my work here is of a lesser caliber, i hope that someone, likewise, may someday find it of use.
+a huge thanks to everyone above for sharing the fruits of their studies.
-
+this project would not have happened if not for literature like the above from which to draw.
 ## License
 i'm not a lawyer, and i don't want to be.
 by nature of your reading this, my computer has freely shared these bits with yours.
 at this point, it's foolish to think i could do anything to restrict your actions with them, and even more foolish to believe that i have any sort of "right" to do so.
 however, if you somehow believe IP laws are legitimate, then:
 - i claim whatever minimal copyright is necessary for my own use of this code (and future modifications made by me/shared to this repository) to continue unencumbered.
 - i license these works to you according to that same condition and the additional condition that your use of these works does not force me into any additional interactions with legal systems which i would not have made were these works not made available to you (e.g. your license to these works is conditional upon your not filing any lawsuits/patent claims/etc against me).
 do note that the individual dependencies of this software project include licenses of their own. for your convenience, i've annotated each dependency inside [Cargo.toml](Cargo.toml) with its respective license.
--- a/crates/applications/archive/buffer_proto1.rs
+++ b/crates/applications/archive/buffer_proto1.rs
@@ -1,7 +1,7 @@
 use coremem::{Driver, mat, meas, SpirvDriver};
 use coremem::geom::{Meters, Torus};
 use coremem::sim::units::Seconds;
-use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};
+use coremem::stim::{CurlStimulus, Sinusoid, TimeVarying as _};
 fn main() {
    coremem::init_logging();
@@ -60,12 +60,12 @@ fn main() {
    driver.set_steps_per_stim(1000);
    //driver.fill_region(&ferro1_region, mat::db::linear_iron());
    // Original, 3R1-LIKE ferromagnet (only a vague likeness), sr-latch-8:
-    // driver.fill_region(&ferro1_region, mat::MBFerromagnet::new(-0.3899, 0.3900, 310_000.0));
+    // driver.fill_region(&ferro1_region, mat::MBPgram::new(-0.3899, 0.3900, 310_000.0));
-    // driver.fill_region(&ferro2_region, mat::MBFerromagnet::new(-0.3899, 0.3900, 310_000.0));
+    // driver.fill_region(&ferro2_region, mat::MBPgram::new(-0.3899, 0.3900, 310_000.0));
    // sr-latch-9; dead spot from B=[-0.03, 0.03]. This will help us see if the math is H-triggered
    // or B-triggered
-    // driver.fill_region(&ferro1_region, mat::MBFerromagnet::new(-0.3300, 0.3900, 310_000.0));
+    // driver.fill_region(&ferro1_region, mat::MBPgram::new(-0.3300, 0.3900, 310_000.0));
-    // driver.fill_region(&ferro2_region, mat::MBFerromagnet::new(-0.3300, 0.3900, 310_000.0));
+    // driver.fill_region(&ferro2_region, mat::MBPgram::new(-0.3300, 0.3900, 310_000.0));
    // mu_r=881.33, starting at H=25 to H=75.
    driver.fill_region(&ferro1_region, mat::MHPgram::new(25.0, 881.33, 44000.0));
    driver.fill_region(&ferro2_region, mat::MHPgram::new(25.0, 881.33, 44000.0));
@@ -89,7 +89,7 @@ fn main() {
    assert!(driver.test_region_filled(&sense_region, mat::IsomorphicConductor::new(sense_conductivity)));
    let mut add_drive_pulse = |region: &Torus, start, duration, amp| {
-        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
+        let wave = Sinusoid::from_wavelength(amp, duration * 2.0)
            .half_cycle()
            .shifted(start);
        driver.add_stimulus(CurlStimulus::new(
--- a/crates/applications/archive/buffer_proto2.rs
+++ b/crates/applications/archive/buffer_proto2.rs
@@ -1,7 +1,7 @@
 use coremem::{Driver, mat, meas, SpirvDriver};
 use coremem::geom::{Meters, Torus};
 use coremem::sim::units::Seconds;
-use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};
+use coremem::stim::{CurlStimulus, Sinusoid, TimeVarying as _};
 fn main() {
    coremem::init_logging();
@@ -81,7 +81,7 @@ fn main() {
    }
    let mut add_drive_pulse = |region: &Torus, start, duration, amp| {
-        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
+        let wave = Sinusoid::from_wavelength(amp, duration * 2.0)
            .half_cycle()
            .shifted(start);
        driver.add_stimulus(CurlStimulus::new(
--- a/crates/applications/archive/buffer_proto3.rs
+++ b/crates/applications/archive/buffer_proto3.rs
@@ -5,7 +5,7 @@
 use coremem::{Driver, mat, meas, SpirvDriver};
 use coremem::geom::{Meters, Torus};
 use coremem::sim::units::Seconds;
-use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};
+use coremem::stim::{CurlStimulus, Sinusoid, TimeVarying as _};
 fn main() {
    coremem::init_logging();
@@ -106,7 +106,7 @@ fn main() {
    }
    let mut add_drive_pulse = |region: &Torus, start, duration, amp| {
-        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
+        let wave = Sinusoid::from_wavelength(amp, duration * 2.0)
            .half_cycle()
            .shifted(start);
        driver.add_stimulus(CurlStimulus::new(
--- a/crates/applications/archive/buffer_proto4.rs
+++ b/crates/applications/archive/buffer_proto4.rs
@@ -12,7 +12,7 @@
 use coremem::{Driver, mat, meas, SpirvDriver};
 use coremem::geom::{region, Cube, Meters, Spiral, SwapYZ, Torus, Translate, Wrap};
 use coremem::sim::units::Seconds;
-use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};
+use coremem::stim::{CurlStimulus, Sinusoid, TimeVarying as _};
 fn main() {
@@ -132,7 +132,7 @@ fn main() {
    assert!(driver.test_region_filled(&coupling_region, wire_mat));
    let mut add_drive_pulse = |region: &Torus, start, duration, amp| {
-        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
+        let wave = Sinusoid::from_wavelength(amp, duration * 2.0)
            .half_cycle()
            .shifted(start);
        driver.add_stimulus(CurlStimulus::new(
--- a/crates/applications/archive/minimal_torus.rs
+++ b/crates/applications/archive/minimal_torus.rs
@@ -1,7 +1,7 @@
 use coremem::{Driver, mat, meas, SimState};
 use coremem::geom::{Cube, Index, InvertedRegion, Meters, Torus, Union};
 use coremem::real::R64 as Real;
-use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};
+use coremem::stim::{CurlStimulus, Sinusoid, TimeVarying as _};
 use coremem::units::Seconds;
 fn main() {
@@ -63,10 +63,10 @@ fn main() {
    // if I = k*sin(w t) then dE/dt = k*w sin(w t) / (A*\sigma)
    // i.e. dE/dt is proportional to I/(A*\sigma), multiplied by w (or, divided by wavelength)
    let peak_stim = peak_current/current_duration / (drive_region.cross_section() * conductivity);
-    let pos_wave = Sinusoid1::from_wavelength(peak_stim as _, current_duration * 2.0)
+    let pos_wave = Sinusoid::from_wavelength(peak_stim as _, current_duration * 2.0)
        .half_cycle();
-    let neg_wave = Sinusoid1::from_wavelength(-peak_stim as _, current_duration * 2.0)
+    let neg_wave = Sinusoid::from_wavelength(-peak_stim as _, current_duration * 2.0)
        .half_cycle()
        .shifted(current_duration + current_break);
--- a/crates/applications/archive/wrapped_torus.rs
+++ b/crates/applications/archive/wrapped_torus.rs
@@ -1,6 +1,6 @@
 use coremem::{Driver, mat, meas, SimState, SpirvDriver};
 use coremem::geom::{Index, Meters, Torus};
-use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};
+use coremem::stim::{CurlStimulus, Sinusoid, TimeVarying as _};
 use coremem::units::Seconds;
 fn main() {
@@ -45,7 +45,7 @@ fn main() {
    let sense1_region = Torus::new_xz(Meters::new(ferro1_center + ferro_major, half_height, half_depth), wire_major, wire_minor);
    //driver.fill_region(&ferro1_region, mat::db::linear_iron());
-    driver.fill_region(&ferro1_region, mat::MBFerromagnet::new(-0.3899, 0.3900, 310_000.0));
+    driver.fill_region(&ferro1_region, mat::MBPgram::new(-0.3899, 0.3900, 310_000.0));
    driver.fill_region(&drive1_region, mat::IsomorphicConductor::new(drive_conductivity));
    driver.fill_region(&sense1_region, mat::IsomorphicConductor::new(sense_conductivity));
@@ -54,7 +54,7 @@ fn main() {
    let drive2_region = Torus::new_xz(Meters::new(ferro2_center - ferro_major, half_height, half_depth), wire_major, wire_minor);
    let sense2_region = Torus::new_xz(Meters::new(ferro2_center + ferro_major, half_height, half_depth), wire_major, wire_minor);
-    driver.fill_region(&ferro2_region, mat::MBFerromagnet::new(-0.3899, 0.3900, 310_000.0));
+    driver.fill_region(&ferro2_region, mat::MBPgram::new(-0.3899, 0.3900, 310_000.0));
    driver.fill_region(&drive2_region, mat::IsomorphicConductor::new(drive_conductivity));
    driver.fill_region(&sense2_region, mat::IsomorphicConductor::new(sense_conductivity));
@@ -64,7 +64,7 @@ fn main() {
    driver.add_classical_boundary(Meters::new(boundary_xy, boundary_xy, boundary_z));
    let mut add_drive_pulse = |region: &Torus, start, duration, amp| {
-        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
+        let wave = Sinusoid::from_wavelength(amp, duration * 2.0)
            .half_cycle()
            .shifted(start);
        driver.add_stimulus(CurlStimulus::new(
--- a/crates/applications/buffer_proto5/Cargo.toml
+++ b/crates/applications/buffer_proto5/Cargo.toml
@@ -5,6 +5,8 @@ authors = ["Colin <colin@uninsane.org>"]
 edition = "2021"
 [dependencies]
 bincode = "1.3"  # MIT
 coremem = { path = "../../coremem" }
 log = "0.4"
 rayon = "1.5"  # MIT or Apache 2.0
 serde = "1.0"
--- a/crates/applications/buffer_proto5/src/cache.rs
+++ b/crates/applications/buffer_proto5/src/cache.rs
@@ -0,0 +1,232 @@
 use serde::{de::DeserializeOwned, Serialize};
 use std::sync::RwLock;
 pub struct NoSupplier;
 pub type DiskCache<K, V, S=NoSupplier> = DiskCacheImpl<Entries<K, V>, S>;
 pub type SyncDiskCache<K, V, S=NoSupplier> = DiskCacheImpl<SyncEntries<K, V>, S>;
 pub struct DiskCacheImpl<E, S=NoSupplier> {
    path: String,
    entries: E,
    supplier: S,
 }
 impl<E: EntriesCap> DiskCacheImpl<E, NoSupplier>
 where
    E::Key: DeserializeOwned,
    E::Value: DeserializeOwned,
 {
    #[allow(dead_code)]
    pub fn new(path: &str) -> Self {
        Self::new_with_supplier(path, NoSupplier)
    }
 }
 impl<E: EntriesCap, S> DiskCacheImpl<E, S>
 where
    E::Key: DeserializeOwned,
    E::Value: DeserializeOwned,
 {
    pub fn new_with_supplier(path: &str, supplier: S) -> Self {
        let entries = Self::load_from_disk(path).unwrap_or_default();
        Self {
            path: path.into(),
            entries: E::from_vec(entries),
            supplier,
        }
    }
    fn load_from_disk(path: &str) -> Option<Vec<(E::Key, E::Value)>> {
        let reader = std::io::BufReader::new(std::fs::File::open(path).ok()?);
        bincode::deserialize_from(reader).ok()
    }
 }
 impl<E: EntriesCap, S> DiskCacheImpl<E, S>
 where
    E::Key: Serialize + Clone,
    E::Value: Serialize + Clone,
 {
    fn flush(&self) {
        let writer = std::io::BufWriter::new(std::fs::File::create(&self.path).unwrap());
        bincode::serialize_into(writer, &self.entries.to_vec()).unwrap();
    }
    fn flush_if_inserted(&self, v: GetOrInsert<E::Value>) -> E::Value {
        match v {
            GetOrInsert::Get(v) => v,
            GetOrInsert::Inserted(v) => {
                self.flush();
                v
            },
        }
    }
 }
 impl<E: EntriesCap, S> DiskCacheImpl<E, S> 
 where
    E::Key: PartialEq,
    E::Value: Clone,
 {
    #[allow(dead_code)]
    pub fn get(&self, k: &E::Key) -> Option<E::Value> {
        self.entries.get(k)
    }
 }
 // non-sync insert is mut, while sync is immute
 impl<K: Serialize + Clone + PartialEq, V: Serialize + Clone, S> DiskCache<K, V, S> {
    #[allow(dead_code)]
    /// insert this k/v ONLY IF NOT PRESENT
    pub fn insert(&mut self, k: K, v: V) {
        self.entries.insert(k, v);
        self.flush();
    }
 }
 impl<K: Serialize + Clone + PartialEq, V: Serialize + Clone, S> SyncDiskCache<K, V, S> {
    #[allow(dead_code)]
    /// insert this k/v ONLY IF NOT PRESENT
    pub fn insert(&self, k: K, v: V) {
        self.entries.insert(k, v);
        self.flush();
    }
 }
 // non-sync insert is mut, while sync is immute
 impl<K: Serialize + Clone + PartialEq, V: Serialize + Clone, S> DiskCache<K, V, S> {
    #[allow(dead_code)]
    pub fn get_or_insert_with<F: FnOnce() -> V>(&mut self, k: K, f: F) -> V {
        let v = self.entries.get_or_insert_with(k, |_| f());
        self.flush_if_inserted(v)
    }
 }
 impl<K: Serialize + Clone + PartialEq, V: Serialize + Clone, S> SyncDiskCache<K, V, S> {
    #[allow(dead_code)]
    pub fn get_or_insert_with<F: FnOnce() -> V>(&self, k: K, f: F) -> V {
        let v = self.entries.get_or_insert_with(k, |_| f());
        self.flush_if_inserted(v)
    }
 }
 // non-sync insert is mut, while sync is immute
 impl<K: Serialize + Clone + PartialEq, V: Serialize + Clone, S: FnMut(&K) -> V> DiskCache<K, V, S> {
    #[allow(dead_code)]
    pub fn get_or_insert_from_supplier(&mut self, k: K) -> V {
        let v = self.entries.get_or_insert_with(k, |k| (self.supplier)(k));
        self.flush_if_inserted(v)
    }
 }
 impl<K: Serialize + Clone + PartialEq, V: Serialize + Clone, S: Fn(&K) -> V> SyncDiskCache<K, V, S> {
    #[allow(dead_code)]
    pub fn get_or_insert_from_supplier(&self, k: K) -> V {
        let v = self.entries.get_or_insert_with(k, |k| (self.supplier)(k));
        self.flush_if_inserted(v)
    }
 }
 enum GetOrInsert<V> {
    Get(V),
    Inserted(V),
 }
 //---------- disk cache entries ----------
 // we have the non-sync and the sync K/V collections,
 // which the DiskCacheImpl wraps.
 pub struct Entries<K, V>(Vec<(K, V)>);
 pub struct SyncEntries<K, V>(RwLock<Entries<K, V>>);
 pub trait EntriesCap {
    type Key;
    type Value;
    fn from_vec(v: Vec<(Self::Key, Self::Value)>) -> Self;
    fn to_vec(&self) -> Vec<(Self::Key, Self::Value)>
    where
        Self::Key: Clone,
        Self::Value: Clone;
    fn get(&self, k: &Self::Key) -> Option<Self::Value>
    where
        Self::Key: PartialEq,
        Self::Value: Clone;
 }
 impl<K, V> EntriesCap for Entries<K, V> {
    type Key = K;
    type Value = V;
    fn from_vec(v: Vec<(K, V)>) -> Self {
        Self(v)
    }
    fn to_vec(&self) -> Vec<(K, V)>
    where
        K: Clone,
        V: Clone,
    {
        self.0.clone()
    }
    fn get(&self, k: &K) -> Option<V>
    where
        K: PartialEq,
        V: Clone,
    {
        self.0.iter().find(|(comp_k, _v): &&(K, V)| comp_k == k).map(|(_k, v)| v.clone())
    }
 }
 impl<K, V> EntriesCap for SyncEntries<K, V> {
    type Key = K;
    type Value = V;
    fn from_vec(v: Vec<(K, V)>) -> Self {
        Self(RwLock::new(Entries::from_vec(v)))
    }
    fn to_vec(&self) -> Vec<(K, V)>
    where
        K: Clone,
        V: Clone,
    {
        self.0.read().unwrap().to_vec()
    }
    fn get(&self, k: &K) -> Option<V>
    where
        K: PartialEq,
        V: Clone,
    {
        self.0.read().unwrap().get(k)
    }
 }
 impl<K: PartialEq, V> Entries<K, V> {
    fn insert(&mut self, k: K, v: V) {
        if !self.0.iter().any(|(comp_k, _v): &(K, V)| comp_k == &k) {
            self.0.push((k, v))
        }
    }
 }
 impl<K: Clone + PartialEq, V: Clone> Entries<K, V> {
    fn get_or_insert_with<F: FnOnce(&K) -> V>(&mut self, k: K, f: F) -> GetOrInsert<V> {
        if let Some(v) = self.get(&k) {
            return GetOrInsert::Get(v.clone());
        }
        let v = f(&k);
        self.insert(k, v.clone());
        GetOrInsert::Inserted(v)
    }
 }
 impl<K: PartialEq, V> SyncEntries<K, V> {
    fn insert(&self, k: K, v: V) {
        self.0.write().unwrap().insert(k, v)
    }
 }
 impl<K: Clone + PartialEq, V: Clone> SyncEntries<K, V> {
    fn get_or_insert_with<F: FnOnce(&K) -> V>(&self, k: K, f: F) -> GetOrInsert<V> {
        if let Some(v) = self.get(&k) {
            return GetOrInsert::Get(v.clone());
        }
        let v = f(&k);
        self.insert(k, v.clone());
        GetOrInsert::Inserted(v)
    }
 }
--- a/crates/applications/buffer_proto5/src/main.rs
+++ b/crates/applications/buffer_proto5/src/main.rs
@@ -2,23 +2,40 @@
 //! to couple them. i parameterize the entire setup over a bunch of different factors in order to
 //! search for the conditions which maximize energy transfer from the one core to the other.
-use coremem::{Driver, mat, meas, SpirvDriver};
+use coremem::{Driver, mat, meas};
-use coremem::geom::{region, Cube, Dilate, Memoize, Meters, Region, Spiral, SwapYZ, Torus, Translate, Wrap};
+use coremem::geom::Meters;
 use coremem::geom::region::{
    self,
    Cube,
    Dilate,
    Intersection,
    InvertedRegion,
    Memoize,
    Spiral,
    SwapYZ,
    Torus,
    Translate,
    Wrap
 };
 use coremem::mat::{Ferroxcube3R1MH, IsoConductorOr};
 use coremem::real::{R32, Real as _};
 use coremem::render::CsvRenderer;
-use coremem::stim::{CurlStimulus, Exp1, Gated, Sinusoid1, TimeVarying as _};
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
-use coremem::sim::units::{Seconds, Frame, Time as _};
+use coremem::sim::units::{Seconds, Time as _};
-use coremem::sim::spirv;
+use coremem::stim::{CurlVectorField, Exp, ModulatedVectorField, Sinusoid, TimeVaryingExt as _};
 use coremem::util::cache::DiskCache;
 use log::{error, info, warn};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 mod cache;
 use cache::SyncDiskCache;
 type Mat = IsoConductorOr<f32, Ferroxcube3R1MH>;
 #[allow(unused)]
 use coremem::geom::{Coord as _, Region as _};
 #[allow(unused)]
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum PulseType {
    Square,
@@ -27,6 +44,7 @@ enum PulseType {
    ExpDecay2x,
 }
 #[allow(unused)]
 /// Return just the extrema of some collection
 fn extrema(mut meas: Vec<f32>) -> Vec<f32> {
    let mut i = 0;
@@ -144,31 +162,54 @@ struct Geometries {
    ferro2_region: Torus,
    set1_region: Torus,
    set2_region: Torus,
-    coupling_region: region::Union,
+    coupling_region: region::Union3<
        Memoize<Dilate<Wrap<Translate<SwapYZ<Intersection<Spiral, Cube>>>>>>,
        Memoize<Dilate<Wrap<Translate<SwapYZ<Intersection<Spiral, InvertedRegion<Cube>>>>>>>,
        region::Union3<Cube, Cube, region::Union4<Cube, Cube, Cube, Cube>>
    >,
    coupling_wire_top: Cube,
    coupling_wire_bot: Cube,
    wrap1_len: f32,
    wrap2_len: f32,
 }
 /// computed measurements which get written to disk for later, manual (or grep-based) analysis.
 /// because we only write these (except for the Debug impl reading them to write to disk),
 /// rustc thinks all the fields are dead.
 #[derive(Clone, Debug, Default)]
 struct Results {
    #[allow(dead_code)]
    m1_peak: f32,
    #[allow(dead_code)]
    m2_peak: f32,
    #[allow(dead_code)]
    m1_stable: f32,
    #[allow(dead_code)]
    m2_stable: f32,
    #[allow(dead_code)]
    h1_peak: f32,
    #[allow(dead_code)]
    h2_max: f32,
    #[allow(dead_code)]
    h2_min: f32,
    #[allow(dead_code)]
    h1_stable: f32,
    #[allow(dead_code)]
    h2_stable: f32,
    #[allow(dead_code)]
    iset_min: f32,
    #[allow(dead_code)]
    iset_max: f32,
    #[allow(dead_code)]
    icoupling_peak: f32,
    #[allow(dead_code)]
    peak_m_ratio: f32,
    #[allow(dead_code)]
    stable_m_ratio: f32,
    /// m2_stable divided by m1_peak. i.e. "amplification"
    #[allow(dead_code)]
    m2_stable_m1_peak: f32,
    #[allow(dead_code)]
    t: f32,
 }
@@ -261,29 +302,29 @@ fn derive_geometries(p: GeomParams) -> Option<Geometries> {
        wrap2_bot - feat_sizes*2.0,
        wrap2_bot.with_y(coupling_wire_bot.top()) + feat_sizes*2.0,
    );
-    let coupling_stubs = region::Union::new()
+    let coupling_stubs = region::Union::new4(
-        .with(coupling_stub_top_left.clone())
+        coupling_stub_top_left.clone(),
-        .with(coupling_stub_top_right.clone())
+        coupling_stub_top_right.clone(),
-        .with(coupling_stub_bot_left.clone())
+        coupling_stub_bot_left.clone(),
-        .with(coupling_stub_bot_right.clone())
+        coupling_stub_bot_right.clone(),
-    ;
+    );
-    let coupling_wires = region::Union::new()
+    let coupling_wires = region::Union::new3(
-        .with(coupling_wire_top.clone())
+        coupling_wire_top.clone(),
-        .with(coupling_wire_bot.clone())
+        coupling_wire_bot.clone(),
-        .with(coupling_stubs.clone())
+        coupling_stubs.clone(),
-    ;
+    );
-    let coupling_region = region::Union::new()
+    let coupling_region = region::Union::new3(
-        .with(coupling_region1.clone())
+        coupling_region1.clone(),
-        .with(coupling_region2.clone())
+        coupling_region2.clone(),
-        .with(coupling_wires.clone())
+        coupling_wires.clone(),
-    ;
+    );
-    let wrap1_with_coupling = region::union(
+    let wrap1_with_coupling = region::Union::new2(
        coupling_region1.clone(), coupling_wires.clone()
    );
-    let wrap2_with_coupling = region::union(
+    let wrap2_with_coupling = region::Union::new2(
        coupling_region2.clone(), coupling_wires.clone()
    );
@@ -371,9 +412,10 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
        p.clock_type,
    );
-    let mut driver: SpirvDriver<Mat> = Driver::new_spirv(g.dim, p.geom.feat_size);
+    let mut driver = Driver::new(SpirvSim::<f32, Mat, WgpuBackend>::new(
-    driver.set_steps_per_stim(1000);
+        g.dim.to_index(p.geom.feat_size), p.geom.feat_size
-    if !driver.add_state_file(&*format!("{}/state.bc", prefix), 16000) {
+    ));
    if !driver.add_state_file(&*format!("{}/state.bc", prefix), 4000) {
        // mu_r=881.33, starting at H=25 to H=75.
        let ferro_mat = mat::Ferroxcube3R1MH::new();
        // let ferro_mat = mat::db::conductor(wire_conductivity);
@@ -396,43 +438,39 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
        info!("loaded state file: skipping geometry calculations");
    }
-    let add_drive_sine_pulse = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_sine_pulse = |driver: &mut Driver<f32, _, _>, region: &Torus, start: f32, duration: f32, amp: f32| {
-        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
+        let wave = Sinusoid::from_wavelength(duration * 2.0)
            .half_cycle()
            .scaled(amp)
            .shifted(start);
-        driver.add_stimulus(CurlStimulus::new(
+        driver.add_stimulus(ModulatedVectorField::new(
-            region.clone(),
+            CurlVectorField::new(region.clone()),
-            wave.clone(),
+            wave,
            region.center(),
            region.axis()
        ));
    };
-    let add_drive_square_pulse = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_square_pulse = |driver: &mut Driver<f32, _, _>, region: &Torus, start: f32, duration: f32, amp: f32| {
-        let wave = Gated::new(amp, start, start+duration);
+        let wave = amp.gated(start, start+duration);
-        driver.add_stimulus(CurlStimulus::new(
+        driver.add_stimulus(ModulatedVectorField::new(
-            region.clone(),
+            CurlVectorField::new(region.clone()),
-            wave.clone(),
+            wave,
            region.center(),
            region.axis()
        ));
    };
-    let add_drive_exp_pulse = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_exp_pulse = |driver: &mut Driver<f32, _, _>, region: &Torus, start: f32, duration: f32, amp: f32| {
-        let wave = Exp1::new_at(amp, start, 0.5*duration);
+        let wave = Exp::new_at(amp, start, 0.5*duration);
-        driver.add_stimulus(CurlStimulus::new(
+        driver.add_stimulus(ModulatedVectorField::new(
-            region.clone(),
+            CurlVectorField::new(region.clone()),
-            wave.clone(),
+            wave,
            region.center(),
            region.axis()
        ));
    };
-    let add_drive_step = |driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, amp: f32| {
+    // step function: "permanently" increase the current by `amp`.
-        add_drive_square_pulse(driver, region, start, 1.0, amp);
+    let _add_drive_step = |driver: &mut Driver<f32, _, _>, region: &Torus, start: f32, amp: f32| {
        add_drive_square_pulse(driver, region, start, 1.0 /* effectively infinite duration */, amp);
    };
-    let add_drive_pulse = |ty: PulseType, driver: &mut SpirvDriver<Mat>, region: &Torus, start: f32, duration: f32, amp: f32| {
+    let add_drive_pulse = |ty: PulseType, driver: &mut Driver<f32, _, _>, region: &Torus, start: f32, duration: f32, amp: f32| {
        match ty {
            PulseType::Square => add_drive_square_pulse(driver, region, start, duration, amp),
            PulseType::Sine => add_drive_sine_pulse(driver, region, start, duration, amp),
@@ -490,8 +528,9 @@ fn run_sim(id: u32, p: Params, g: Geometries) -> Results {
    driver.add_csv_renderer(&*meas_csv, 400, None);
    driver.add_csv_renderer(&*meas_sparse_csv, 8000, None);
    driver.set_steps_per_stimulus(20);
    driver.step_until(duration);
-    
+
    let (m1_peak, m1_stable) = significa(CsvRenderer::new(&*meas_sparse_csv).read_column_as_f32("M(mem1)"));
    let (m2_peak, m2_stable) = significa(CsvRenderer::new(&*meas_sparse_csv).read_column_as_f32("M(mem2)"));
    let (h1_peak, h1_stable) = significa(CsvRenderer::new(&*meas_sparse_csv).read_column_as_f32("H(mem1)"));
@@ -636,7 +675,7 @@ fn main() {
        variants.len() / post_times.len(),
    );
-    let mut geom_cache = DiskCache::new_with_supplier(
+    let geom_cache = SyncDiskCache::new_with_supplier(
        &format!("{}/.geom_cache", ensure_out_dir(i)),
        |geom: &GeomParams| derive_geometries(geom.clone())
    );
@@ -682,7 +721,7 @@ fn main() {
        };
        let wraps1_choices: Vec<_> = (-120..120)
-            .into_iter()
+            .into_par_iter()
            .filter_map(|wraps1| {
                let params = GeomParams {
                    wraps1: (wraps1 * 4) as f32,
@@ -707,7 +746,7 @@ fn main() {
            .0.wraps1;
        let wraps2_choices: Vec<_> = (-120..120)
-            .into_iter()
+            .into_par_iter()
            .filter_map(|wraps2| {
                let params = GeomParams {
                    wraps2: (wraps2 * 4) as f32,
--- a/crates/applications/multi_core_inverter/src/main.rs
+++ b/crates/applications/multi_core_inverter/src/main.rs
--- a/crates/applications/sr_latch/src/main.rs
+++ b/crates/applications/sr_latch/src/main.rs
@@ -1,20 +1,20 @@
-/// this example creates a "set/reset" latch from a non-linear ferromagnetic device.
+//! this example creates a "set/reset" latch from a non-linear ferromagnetic device.
-/// this is quite a bit like a "core memory" device.
+//! this is quite a bit like a "core memory" device.
-/// the SR latch in this example is wired to a downstream latch, mostly to show that it's
+//! the SR latch in this example is wired to a downstream latch, mostly to show that it's
-/// possible to transfer the state (with some limitation) from one latch to another.
+//! possible to transfer the state (with some limitation) from one latch to another.
-use coremem::{Driver, mat, meas, SpirvDriver};
+use coremem::{Driver, mat, meas};
-use coremem::geom::{Meters, Torus};
+use coremem::geom::{Coord as _, Meters, Torus};
-use coremem::sim::spirv;
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
 use coremem::sim::units::Seconds;
-use coremem::stim::{CurlStimulus, Sinusoid1, TimeVarying as _};
+use coremem::stim::{CurlVectorField, ModulatedVectorField, Sinusoid, TimeVaryingExt as _};
 fn main() {
    coremem::init_logging();
    // feature size: the side-length of each discrete grid cell to model (in Meters)
    let feat_size = 20e-6f32;
-    
+
    // parameters used below to describe the components we construct below. units are (M, A or S).
    let depth = 1600e-6;
    // closest distance between the non-vacuum component and the dissipating boundary
@@ -59,7 +59,9 @@ fn main() {
    let coupling_region = Torus::new_xz(Meters::new(0.5*(ferro1_center + ferro2_center), ferro_center_y, half_depth), wire_coupling_major, wire_minor);
    let sense_region = Torus::new_xz(Meters::new(ferro2_center + ferro_major, ferro_center_y, half_depth), wire_major, wire_minor);
-    let mut driver: SpirvDriver<spirv::FullyGenericMaterial> = Driver::new_spirv(Meters::new(width, height, depth), feat_size);
+    let mut driver = Driver::new(SpirvSim::<f32, mat::GenericMaterial<f32>, WgpuBackend>::new(
        Meters::new(width, height, depth).to_index(feat_size), feat_size
    ));
    // mu_r=881.33, starting at H=25 to H=75.
    driver.fill_region(&ferro1_region, mat::MHPgram::new(25.0, 881.33, 44000.0));
@@ -76,14 +78,13 @@ fn main() {
    // helper to schedule a stimulus at the provided start time/duration.
    let mut add_drive_pulse = |region: &Torus, start, duration, amp| {
-        let wave = Sinusoid1::from_wavelength(amp, duration * 2.0)
+        let wave = Sinusoid::from_wavelength(duration * 2.0)
            .half_cycle()
            .scaled(amp)
            .shifted(start);
-        driver.add_stimulus(CurlStimulus::new(
+        driver.add_stimulus(ModulatedVectorField::new(
-            region.clone(),
+            CurlVectorField::new(region.clone()),
-            wave.clone(),
+            wave,
            region.center(),
            region.axis()
        ));
    };
@@ -143,8 +144,6 @@ fn main() {
    // render a couple CSV files: one very detailed and the other more sparsely detailed
    driver.add_csv_renderer(&*format!("{}meas.csv", prefix), 200, None);
    driver.add_csv_renderer(&*format!("{}meas-sparse.csv", prefix), 1600, None);
    // how frequently to re-evaluate the stimulus (Sample & Hold interpolation between evaluations)
    driver.set_steps_per_stim(1000);
    driver.step_until(Seconds(duration));
 }
--- a/crates/applications/stacked_cores/Cargo.toml
+++ b/crates/applications/stacked_cores/Cargo.toml
@@ -0,0 +1,9 @@
 [package]
 name = "stacked_cores"
 version = "0.1.0"
 authors = ["Colin <colin@uninsane.org>"]
 edition = "2021"
 [dependencies]
 coremem = { path = "../../coremem" }
 log = "0.4"
--- a/crates/applications/stacked_cores/scripts/analyze_inverters.py
+++ b/crates/applications/stacked_cores/scripts/analyze_inverters.py
@@ -0,0 +1,10 @@
 #!/usr/bin/env python3
 from stacked_cores_40xx_db import *
 sims = [(p, c.logically_inverted()) for (p, c) in filter_meas(run="41")]
 sims.sort(key=lambda c: c[1].get(1.0))
 for (params, curve) in sims[:20]:
    viable = curve.is_viable_inverter()
    print(f"{params}: {curve.get(1.0):.3}, {curve.get(0.0):.3}, inv?: {viable}")
--- a/crates/applications/stacked_cores/scripts/extract_meas.py
+++ b/crates/applications/stacked_cores/scripts/extract_meas.py
@@ -0,0 +1,117 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 51-xx or later demos
 to extract higher-level info from them.
 """
 import os
 import sys
 import re
 from natsort import natsorted
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 from stacked_cores_39xx import extract_polarity
 class MeasRow:
    def __init__(self, t_sec: float, m: list):
        self.t_sec = t_sec
        self.m = m
    def __repr__(self) -> str:
        m = ", ".join(f"{v:6}" for v in self.m)
        return f"MeasRow({self.t_sec}, [{m}])"
    @staticmethod
    def from_dict(row_data: dict) -> 'MeasRow':
        t_sec = row_data["time"]
        m = [int(m + 0.5) for m in extract_m(row_data)]
        return MeasRow(t_sec, m)
 def format_float_tuple(t: tuple) -> str:
    formatted_elems = [f"{e:= 05.3f}," for e in t]
    return f"({' '.join(formatted_elems)})"
 def format_list(l: list) -> str:
    if len(l) == 0: return "[]"
    if len(l) == 1: return f"{l}"
    formatted_elems = [f"    {e}," for e in l]
    return "\n".join(["["] + formatted_elems + ["]"])
 def indented(s: str) -> str:
    return s.replace('\n', '\n    ')
 class ParameterizedMeas:
    def __init__(self, meas = None):
        self.meas = meas or {}
    def add_meas(self, params: tuple, meas_rows: list):
        self.meas[tuple(params)] = meas_rows
    def all_rows(self) -> list:
        # this is just `sum(self.meas.values())` but python is a idiot
        rows = []
        for mrows in self.meas.values():
            rows.extend(mrows)
        return rows
    def runs(self) -> list:
        return self.meas.values()
    def num_runs(self) -> int:
        return len(self.meas)
    def __repr__(self) -> str:
        meas_entries = "\n".join(
            f"    {format_float_tuple(k)}: {indented(format_list(v))}," for (k, v) in natsorted(self.meas.items())
        )
        return f"ParameterizedMeas({{\n{meas_entries}\n}})"
 def extract_rows(path: str, times: list) -> list:
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    meas_rows = []
    for t in times:
        row = last_row_before_t(rows, t)
        if not row: return None
        meas_rows.append(MeasRow.from_dict(row))
        # validate the sim has run to completion
        if meas_rows[-1].t_sec < 0.95 * t: return None
        meas_rows[-1].t_sec = t  # make pretty
    return meas_rows
 def parse_param(s: str) -> float:
    """ parse a parameter in the form of 'p050' or 'n0015' or '000' """
    if s == "000":
        return 0.0
    sign = {'n': -1, 'p': 1}[s[0]]
    mag = int(s[1:])
    max_mag = 10**(len(s[1:]) - 1)
    return sign * mag / max_mag
 def extract_params(pstr: str) -> list:
    """ extract parameters from a string like -n100-000 """
    pieces = [p for p in pstr.split("-") if p]
    return [parse_param(p) for p in pieces]
 def extract_parameterized_meas(stem: str, times: list) -> ParameterizedMeas:
    """ given some stem, parse all parameterized measurements associated with that stem """
    base_dir, prefix = os.path.split(stem)
    built = ParameterizedMeas()
    for entry in os.listdir(base_dir):
        if entry.startswith(prefix):
            meas_rows = extract_rows(os.path.join(base_dir, entry, "meas.csv"), times)
            if not meas_rows: continue
            params = extract_params(entry[len(prefix):])
            built.add_meas(params, meas_rows)
    return built
 if __name__ == "__main__":
    print(extract_parameterized_meas(sys.argv[1], [float(f) for f in sys.argv[2:]]))
--- a/crates/applications/stacked_cores/scripts/fake_cores_db.py
+++ b/crates/applications/stacked_cores/scripts/fake_cores_db.py
@@ -0,0 +1,89 @@
 from inverter_characteristics import Piecewise
 # stable inverter (ideal)
 fwd_fake_step = Piecewise(
    [
        [ 0.0, 0.0 ],
        [ 0.4, 0.0 ],
        [ 0.6, 1.0 ],
        [ 1.0, 1.0 ],
    ]
 )
 # stable inverter (amplifying)
 fwd_fake_1_5x = Piecewise(
    [
        [ 0.0,  0.0 ],
        [ 0.65, 1.0 ],
        [ 1.0,  1.0 ],
    ]
 )
 # stable inverter (amplifying only from 0.3 -> 0.5)
 fwd_fake_slope_change_before_0_5 = Piecewise(
    [
        [ 0.0, 0.2 ],
        [ 0.3, 0.3 ],
        [ 0.5, 0.6 ],
        [ 1.0, 1.0 ],
    ]
 )
 # failed inverter (>1.0 slope happens too late)
 # flipping x doesn't fix.
 # however, shifting x by -0.1 and y by -0.2 and *then* inverting x does.
 # - this gives us a concave-up 1-x like curve
 fwd_fake_slope_change_after_0_5 = Piecewise(
    [
        [ 0.0, 0.2 ],
        [ 0.3, 0.3 ],
        [ 0.6, 0.5 ],
        [ 1.0, 1.0 ],
    ]
 )
 slope_fake_hill = [
  0.8, 0.9, 1.0, 1.1, 1.2, 1.2, 1.1, 1.0, 0.9, 0.8
 ]
 fwd_fake_hill = Piecewise(
    [ (0.1*i, 0.1 * sum(slope_fake_hill[0:i])) for i in range(11) ]
 )
 fwd_fake_asymmetric_hill = Piecewise(
    [
        (0.0, 0.20),
        (0.2, 0.30),
        (0.4, 0.45),
        (0.6, 0.75),
        (0.8, 0.80),
        (1.0, 0.85),
    ]
 )
 # valid inverter; the [0.6, 1.0] -> 0.8 mapping *fixes* the logic low value to
 #   1.0 - 0.8 = 0.2
 #   and allows anything 0.6 to 1.0 to be recognized as logic high immediately.
 # i.e. "bottoming out" is a *good* thing
 fwd_fake_asymmetric_flats = Piecewise(
    [
        (0.0, 0.20),
        (0.2, 0.30),
        (0.6, 0.80),
        (1.0, 0.80),
    ]
 )
 fwd_fake_asymmetric_overdrive = Piecewise(
    [
        (0.0, 0.40),
        (0.3, 0.50),
        (0.6, 0.85),
        (1.0, 0.90),
    ]
 )
 fwd_fake_asymmetric_bottom_out = Piecewise(
    [
        (0.0, 0.00),
        (0.8, 0.99),
        (1.0, 1.00),
    ]
 )
--- a/crates/applications/stacked_cores/scripts/inverter_characteristics.py
+++ b/crates/applications/stacked_cores/scripts/inverter_characteristics.py
@@ -0,0 +1,181 @@
 import plotly.express as px
 from pandas import DataFrame
 from math import floor
 def extrema(arr):
    arr = list(arr)
    return min(arr), max(arr)
 class Line:
    def __init__(self, from_, to):
        self.from_ = from_
        self.to = to
    def slope(self) -> float:
        return (self.to[1] - self.from_[1]) / (self.to[0] - self.from_[0])
    def get(self, x: float) -> float:
        from_x, from_y = self.from_
        to_x, to_y = self.to
        if x == from_x:
            return from_y
        tween = (x - from_x) / (to_x - from_x)
        return tween * to_y + (1-tween) * from_y
 class Piecewise:
    def __init__(self, xy: list):
        """ xy is a list of (x, y) pairs """
        self.xy = list(xy)
    @property
    def num_pieces(self) -> int:
        return len(self.xy) - 1
    def normalized(self, prev_max: float) -> 'Piecewise':
        """ map every coordinate from [-prev_max, prev_max] to [0, 1] """
        p = prev_max
        r = 2*prev_max
        s = 1.0/r
        return Piecewise([
            (s*(x + p), s*(y + p)) for (x, y) in self.xy
        ])
    def inverted_y(self) -> 'Piecewise':
        return Piecewise([
            (x, -y) for (x, y) in self.xy
        ])
    def inverted_x(self) -> 'Piecewise':
        return Piecewise([
            (-x, y) for (x, y) in self.xy
        ][::-1])
    def inverted_xy(self) -> 'Piecewise':
        return self.inverted_x().inverted_y()
    def logically_inverted(self) -> 'Piecewise':
        """ return a Piecewise that evaluates to 1-y """
        return self.inverted_y().shifted_y(1.0)
    def logically_inverted_x(self) -> 'Piecewise':
        return self.inverted_x().shifted_x(1.0)
    def logically_inverted_xy(self) -> 'Piecewise':
        return self.logically_inverted_x().logically_inverted()
    def shifted_x(self, shift: float) -> 'Piecewise':
        return Piecewise([
            (x + shift, y) for (x, y) in self.xy
        ])
    def shifted_y(self, shift: float) -> 'Piecewise':
        return Piecewise([
            (x, y + shift) for (x, y) in self.xy
        ])
    def scaled_y(self, scale: float) -> 'Piecewise':
        return Piecewise([
            (x, y * scale) for (x, y) in self.xy
        ])
    def clipped(self, min_y: float = 0.00, max_y: float = 1.0) -> 'Piecewise':
        return Piecewise([
            (x, min(max_y, max(min_y, y))) for (x, y) in self.xy
        ])
    def flat_extrapolation(self) -> 'Piecewise':
        """ make it so f(x) for x OOB returns the nearest in-bounds y """
        first = self.xy[0]
        last = self.xy[-1]
        new_first = (first[0] - 1.0, first[1])
        new_last = (last[0] + 1.0, last[1])
        return Piecewise([new_first] + self.xy + [new_last])
    def cascaded(self, second: 'Piecewise') -> 'Piecewise':
        """ return a function equivalent to y = second(self(x)) """
        b_min, b_max = extrema(x for (x, y) in self.xy)
        bounds = [ int(floor(b_min * 100)), 1 + int(floor(b_max * 100)) ]
        return Piecewise([
            (0.01 * x, second.get(self.get(0.01 * x))) for x in range(*bounds)
        ])
    def line_for(self, x: float) -> Line:
        for first_lower in self.xy[:-1][::-1]:
            if first_lower[0] < x: break
        for first_upper in self.xy[1:]:
            if not first_upper[0] < x: break
        return Line(first_lower, first_upper)
    def is_viable_inverter(self) -> bool:
        res = 1000
        for i in range(res+1):
            x = i/res
            n = (i+1)/res
            if self.get(self.get(x)) < x and self.get(self.get(n)) > n:
                return True
        return False
    def get(self, x: float) -> float:
        """
        evaluate the piecewise function at the provided x value.
        OOB points are just extrapolated from the nearest piece.
        """
        return self.line_for(x).get(x)
    def get_slope(self, x: float) -> float:
        return self.line_for(x).slope()
    def get_repeated(self, x: float, n: int = 255) -> float:
        for _ in range(n):
            x = self.get(x)
        return x
    def get_mean(self, x: float, n: int = 100) -> float:
        return 1/n * sum(self.get(x * i/(n-1)) for i in range(n))
    def get_integral(self, x: float, n: int = 100) -> float:
        return self.get_mean(x) * x
    def get_range(self, xmin: float = 0.0, xmax: float = 1.0) -> float:
        return abs(self.get(xmax) - self.get(xmin))
    def df_for(self, from_: float, to: float, points: int, f) -> DataFrame:
        x_step = (to - from_) / (points - 1)
        x = [from_ + x_step*x for x in range(points)]
        y = [f(xi) for xi in x]
        return DataFrame(data=dict(x=x, y=y))
    def df(self, from_: float = 0.0, to: float = 1.0, points: int = 101) -> DataFrame:
        return self.df_for(from_, to, points, self.get)
    def slope_df(self, from_: float = 0.0, to: float = 1.0, points: int = 101) -> DataFrame:
        return self.df_for(from_, to, points, self.get_slope)
    def min_max_slope(self):
        slope = [self.get_slope(0.01*x) for x in range(101)]
        return min(slope), max(slope)
    def max_abs_slope(self) -> float:
        return max(abs(s) for s in self.min_max_slope())
    def plot_for(self, from_: float, to: float, title: str, f):
        df = self.df_for(from_, to, points=101, f=f)
        fig = px.line(df, x="x", y="y", title=title)
        fig.show()
    def plot(self, from_: float = 0.0, to: float = 1.0, title: str = "Piecewise"):
        self.plot_for(from_, to, title, self.get)
    def plot_slope(self, from_: float = 0.0, to: float = 1.0, title: str = "Piecewise"):
        self.plot_for(from_, to, title, self.get_slope)
    def plot_equilibrium(self, from_: float = 0.0, to: float = 1.0, title: str = "Piecewise"):
        self.plot_for(from_, to, title, self.get_repeated)
    def plot_integral(self, from_: float = 0.0, to: float = 1.0, title: str = "Piecewise"):
        self.plot_for(from_, to, title, self.get_integral)
--- a/crates/applications/stacked_cores/scripts/levels_proto.py
+++ b/crates/applications/stacked_cores/scripts/levels_proto.py
@@ -0,0 +1,242 @@
 #!/usr/bin/env python3
 """
 try to understand which transfer characteristics can be used to create stable logic.
 """
 def fwd(offset: float, amp: float):
    return lambda x: min(1.0, offset + amp*x)
 def inv(offset: float, amp: float):
    return lambda x: max(0.0, 1.0 - offset - amp*x)
 def inv_from_fwd(inv):
    return lambda x: max(0.0, 1 - inv(x))
 def test_stability(fwd, inv):
    low = 0.0
    high = 1.0
    mid = 0.5
    for i in range(8):
        low, high, mid = inv(fwd(high)), inv(fwd(low)), inv(fwd(mid))
        print(f"low {low:.2f} high {high:.2f} bistable {mid:.2f}")
 def map_stability(inv):
    s = []
    for i in range(101):
        v = i/100.0
        for _ in range(32):
            v = inv(v)
        s.append(v)
    logic_low = s[0]
    logic_high = s[100]
    logic_mean = 0.5*(logic_low + logic_high)
    print(f"low: {logic_low:.2f}, high: {logic_high:.2f}")
    for i, v in enumerate(s):
        if v >= logic_mean:
            print("logic cutoff: {:.2f}".format(i/100))
            break
 def print_to_stable(inv, f):
    for _ in range(8):
        next = inv(f)
        print(f"{f} -> {next}")
        f = next
 def print_to_stable_noise(inv, f, noise=0.01):
    for i in range(8):
        if i%2: x = f - noise
        else: x = f + noise
        next = inv(x)
        print(f"{f:.3} ({x:.3}) -> {next:.3}")
        f = next
 print("stability: 0.2 + 2.0*x")
 test_stability(fwd(0.2, 2.0), inv(0.2, 2.0))
 print("stability: 0.2 + 1.5*x")
 test_stability(fwd(0.2, 1.5), inv(0.2, 1.5))
 print("stability: 0.2 + 1.1*x")
 test_stability(fwd(0.2, 1.1), inv(0.2, 1.1))
 print("stability: 0.4 + 1.1*x")
 test_stability(fwd(0.4, 1.1), inv(0.4, 1.1))
 print("stability: 0.5 + 2.0*x")
 test_stability(fwd(0.5, 2.0), inv(0.5, 2.0))
 print("stability: 0.9*x")
 test_stability(fwd(0.0, 0.9), inv(0.0, 0.9))
 print("stability: 0.2 + 1.5*x; 0.9 - 0.7*x")
 test_stability(fwd(0.2, 1.5), inv(0.1, 0.7))
 print("stability: 0.1 + 1.3*x; 0.9 - 0.7*x")
 test_stability(fwd(0.1, 1.3), inv(0.1, 0.7))
 print("""\
    offset isn't a deal-breaker until it approaches 50%.
    for any offset < 0.5, amplification > 1.0, there is *some* stable pair of levels.
    as offset increases, the stable pairs become closer together
 """)
 def test_stability_inv(inv, mid=0.5):
    low = 0.0
    high = 1.0
    for i in range(32):
        low, high, mid = inv(high), inv(low), inv(mid)
        print(f"low {low:.2f} high {high:.2f} bistable {mid:.2f}")
 print("stability_inv: 0.2 + 2.0*x")
 test_stability_inv(inv(0.2, 2.0))
 print("stability_inv: 0.2 + 1.1*x")
 test_stability_inv(inv(0.2, 1.1))
 print("stability_inv: 0.5 + 1.1*x")
 test_stability_inv(inv(0.5, 1.1), 0.2)
 print("stability_inv: 0.7 + 1.1*x")
 test_stability_inv(inv(0.7, 1.1), 0.1)
 print("""\
    inverter-only circuits can be stable, even if they ordinarily bias to just one direction...
    the same amp > 1.0 condition holds.
    importantly, offset > 0.5 becomes *fine*
 """)
 def piecewise(points: list, scale=20000.0):
    """
    each element in points is a two-tuple (input, output), sorted by input value.
    the return value is a function which:
    - accepts f: [0..1]
    - maps that to [-scale..scale]
    - maps that through `points` with linear interpolation
    - scales back to [0..1] and return that
    """
    def apply(f):
        x_in_scale = -scale + f * scale * 2.0
        # locate the first point smaller than the input
        for first_lower in points[:-1][::-1]:
            if first_lower[0] < x_in_scale: break
        for first_higher in points[1:]:
            if first_higher[0] > x_in_scale: break
        # print(x_in_scale, first_lower, first_higher)
        tween = (x_in_scale - first_lower[0]) / (first_higher[0] - first_lower[0])
        y_in_scale =  tween * first_higher[1] + (1-tween) * first_lower[1]
        r = (y_in_scale + scale) / (2.0 * scale)
        return max(0.0, min(1.0, r))
    return apply
 fwd_26 = piecewise(
    [
        [ -14687, -7326 ],
        [ -13049, -6503 ],
        [ -11785, -5833 ],
        [ -4649,  -1447 ],
        [ 4961,    7059 ],
        [ 11283,  11147 ],
    ],
    17000
 )
 print("stability 26 7:1 windings  (SUITABLY DIFFERENTIATED)")
 test_stability(fwd_26, lambda x: 1-x)
 map_stability(inv_from_fwd(fwd_26))
 fwd_36 = piecewise(
 [
    [ (-13430 + -14112 + -13935)/3, -8297],
    [ (-12796 + -13454 + -13293)/3, -7684],
    [ (-4872 + -5106 + -5091)/3,     -282],
    [ (2322 + 2343 + 3705)/3,        7411],
    [ (4854 + 4840 + 7273)/3,        9318],
    [ (7324 + 7138 + 10608)/3,      10151],
    [ (10552 + 10509 + 14412)/3,    11398],
    [ (13418 + 13482 + 14760)/3,    13081],
    [ (14196 + 14528 + 14533)/3,    13580],
 ], 15000)
 print("stability 36 (3:1) cores  (not suitably differentiated)")
 test_stability(fwd_36, lambda x: 1-x)
 fwd_38_2_0 = piecewise(
    [
        [ (-13745 + -13012)/2, -6222 ],
        [ (-4969 + -4744)/2,    2373 ],
        [ (1772 + 2070)/2,     10467 ],
        [ (4472 + 4114)/2,     12921 ],
        [ (7221 + 6291)/2,     14530 ],
        [ (11159 + 10397)/2,   15865 ],
        [ (12430 + 15653)/2,   16202 ],
    ], 17000
 )
 print("stability 38 2:0 cores  (SUITABLY DIFFERENTIATED)")
 test_stability(fwd_38_2_0, lambda x: 1-x)
 map_stability(inv_from_fwd(fwd_38_2_0))
 # print_to_stable(inv_from_fwd(fwd_38_2_0), 0.0)
 # print_to_stable_noise(inv_from_fwd(fwd_38_2_0), 0.32, 0.01)
 fwd_38_3_0 = piecewise(
    [
        [ (-13956 + -13890 + -13077)/3, -5203],
        [ (-4979 + -4885  + -4717)/3, 5051],
        [ (1531 + 503 + 1006)/3, 12509],
        [ (4180 + 1821 + 2239)/3, 14386],
        [ (6986 + 3436 + 3701)/3, 15451],
        [ (10482 + 6644 + 7735)/3, 16081],
        [ (11436 + 13343 + 14411)/3, 16380],
    ], 17000
 )
 print("stability 38 3:0 cores  (SUITABLY DIFFERENTIATED)")
 test_stability(fwd_38_3_0, lambda x: 1-x)
 map_stability(inv_from_fwd(fwd_38_3_0))
 # print_to_stable(inv_from_fwd(fwd_38_3_0), 0.0)
 # print_to_stable_noise(inv_from_fwd(fwd_38_3_0), 0.29, 0.01)
 # fwd_38_2_0 minus 8000
 # de-biasing like this makes for a WORSE inverter
 # fwd_38_2_0_offset = piecewise(
 #     [
 #         [ (-13745 + -13012)/2, -14222 ],
 #         [ (-4969 + -4744)/2,   -5627 ],
 #         [ (1772 + 2070)/2,      2467 ],
 #         [ (4472 + 4114)/2,      4921 ],
 #         [ (7221 + 6291)/2,      6530 ],
 #         [ (11159 + 10397)/2,    7865 ],
 #         [ (12430 + 15653)/2,    8202 ],
 #     ], 17000
 # )
 # print("stability 38 2:0 cores  (offset -8000)")
 # print("pw(0) = ", fwd_38_2_0_offset(0))
 # test_stability(fwd_38_2_0_offset, lambda x: 1-x)
 # map_stability(inv_from_fwd(fwd_38_2_0_offset))
 # print_to_stable(inv_from_fwd(fwd_38_2_0_offset), 0.0)
 fwd_38_4_2 = piecewise([
    [ (-14049 + -14191 + -14218 + -14161)/4, -10675],
    [ (-4993 + -4944 + -4985 + -5123)/4,      -4640],
    [ (1948 + 1854 + 2633 + 2602)/4,             73],
    [ (4823 + 3914 + 5799 + 6177)/4,           1651],
    [ (7933 + 6731 + 10058 + 9404)/4,          2625],
    [ (11420 + 11947 + 15968 + 14039)/4,       6413],
    [ (13786 + 16465 + 16667 + 15144)/4,       8180],
 ], 17000)
 print("stability 38 4:2 cores  (not suitably differentiated)")
 test_stability(fwd_38_4_2, lambda x: 1-x)
 fwd_38_5_2 = piecewise([
    [ (-14056 + -14195 + -14234 + -14224 + -14162)/5,  -8395],
    [ (-4990 + -4931 + -4935 + -4968 + -5080)/5,       -1468],
    [ (1804 + 966 + 1564 + 1808 + 2237)/5,              3661],
    [ (4632 + 2427 + 3521 + 4500 + 5563)/5,             5238],
    [ (7629 + 4568 + 6648 + 8011 + 8527)/5,             6710],
    [ (10758 + 9552 + 12404 + 13748 + 12927)/5,         10892],
    [ (12335 + 15650 + 16603 + 16403 + 14635)/5,        13151],
 ], 17000)
 print("stability 38 5:2 cores  (not suitably differentiated)")
 test_stability(fwd_38_5_2, lambda x: 1-x)
 # TODO: code 24, 26, 27, 28, 30 (asymmetric windings)
--- a/crates/applications/stacked_cores/scripts/plot_52xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_52xx.py
@@ -0,0 +1,18 @@
 #!/usr/bin/env python3
 from natsort import natsorted
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 or_gates = read_db(lambda name: name.startswith("52-"))
 sweep_a0 = lambda a1, points=101: [(unit_to_m(x/(points-1)), a1, None, None) for x in range(points)]
 sweep_a1 = lambda a0, points=101: [(a0, unit_to_m(x/(points-1)), None, None) for x in range(points)]
 for name, meas in natsorted(or_gates.items()):
    trace = eval_series(meas, sweep_a1(-17000), extract_52xx_tx)
    plot(f"{name}", "a1", trace)
    plot_slope(f"slope {name}", "a1", trace)
--- a/crates/applications/stacked_cores/scripts/plot_53xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_53xx.py
@@ -0,0 +1,32 @@
 #!/usr/bin/env python3
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 def extract_53xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat tuple of input/output M mappings from a 53xx run
    """
    return (meas_rows[1].m[0], meas_rows[0].m[1], meas_rows[0].m[2], meas_rows[1].m[3])
 sweep_buf_inputs = lambda offset=0, points=101: [(None, m, -m + offset, None) for m in sweep_1d(points)]
 sweep_m1 = lambda m2, points=101: [(None, m, m2, None) for m in sweep_1d(points)]
 buf_gates = read_db(lambda name: name.startswith("53-buf-no_inp_couple-"))
 for name, meas in natsorted(buf_gates.items()):
    print(name)
    # normal M2 = -M1 sweep
    trace = eval_series(meas, sweep_buf_inputs(points=41), extract_53xx_tx, y_idx=0)
    plot(name, "a0", trace)
    plot_slope(f"slope {name}", "a0", trace)
    # M2 = 0.25 - M1 shifted sweep
    # trace = eval_series(meas, sweep_buf_inputs(8500), extract_53xx_tx, y_idx=0)
    # plot(f"In=0.25-Ip {name}", "a0", trace)
    # plot_slope(f"slope In=0.25-Ip {name}", "a0", trace)
    # M2 fixed at 0.0 while M1 sweeps
    # trace = eval_series(meas, sweep_m1(0.0), extract_53xx_tx, y_idx=0)
    # plot(f"In=0 {name}", "a0", trace)
    # plot_slope(f"slope In=0 {name}", "a0", trace)
--- a/crates/applications/stacked_cores/scripts/plot_54xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_54xx.py
@@ -0,0 +1,24 @@
 #!/usr/bin/env python3
 from natsort import natsorted
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 def extract_54xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat tuple of input/output M mappings from a 54xx run
    """
    return (meas_rows[0].m[0], meas_rows[2].m[0])
 split_gates = read_db(lambda name: name.startswith("54-"))
 sweep_input = lambda points=101: [( unit_to_m(x/(points-1)), None ) for x in range(points)]
 for name, meas in natsorted(or_gates.items()):
    trace = eval_series(meas, sweep_input(), extract_54xx_tx)
    plot(f"{name}", "a1", trace)
    plot_slope(f"slope {name}", "a1", trace)
--- a/crates/applications/stacked_cores/scripts/plot_55xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_55xx.py
@@ -0,0 +1,24 @@
 #!/usr/bin/env python3
 from natsort import natsorted
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 def extract_55xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat tuple of input/output M mappings from a 55xx run
    """
    return (meas_rows[0].m[1], meas_rows[2].m[1])
 split_gates = read_db(lambda name: name.startswith("55-"))
 sweep_input = lambda points=101: [( unit_to_m(x/(points-1)), None ) for x in range(points)]
 for name, meas in natsorted(split_gates.items()):
    trace = eval_series(meas, sweep_input(), extract_55xx_tx)
    plot(f"{name}", "a1", trace)
    plot_slope(f"slope {name}", "a1", trace)
--- a/crates/applications/stacked_cores/scripts/plot_56xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_56xx.py
@@ -0,0 +1,32 @@
 #!/usr/bin/env python3
 from natsort import natsorted
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 def extract_56xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat tuple of input/output M mappings from a 56xx run
    """
    return (
        meas_rows[0].m[0],  # input
        meas_rows[0].m[1],  # input
        meas_rows[1].m[2],  # output
        meas_rows[1].m[3],  # output
        meas_rows[0].m[4],  # input
        meas_rows[0].m[5],  # input
    )
 buf_gates = read_db(lambda name: name.startswith("56-"))
 sweep_buf_inputs = lambda points=101: [(m, m, None, None, -m, -m) for m in sweep_1d(points)]
 for name, meas in natsorted(buf_gates.items()):
    trace = eval_series(meas, sweep_buf_inputs(), extract_56xx_tx, y_idx=2)
    plot(f"{name}", "a1", trace)
    plot_slope(f"slope {name}", "a1", trace)
--- a/crates/applications/stacked_cores/scripts/plot_57xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_57xx.py
@@ -0,0 +1,37 @@
 #!/usr/bin/env python3
 from natsort import natsorted
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 def extract_57xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat tuple of input/output M mappings from a 57xx run
    """
    return (
        meas_rows[0].m[0],  # input
        meas_rows[1].m[1],  # output
        meas_rows[0].m[2],  # input
        meas_rows[0].m[3],  # input
        meas_rows[1].m[4],  # output
        meas_rows[0].m[5],  # input
    )
 # buf_gates = read_db(lambda name: name.startswith("57-"))
 buf_gates = read_db(lambda name: name.startswith("57-buf-1p-2n-"))
 sweep_buf_inputs = lambda points=101: [(m, None, m, -m, None, -m) for m in sweep_1d(points)]
 sweep_pos_input = lambda mneg, points=101: [(m, None, m, mneg, None, mneg) for m in sweep_1d(points)]
 sweep_2n1p_input = lambda points=101: [(m, None, None, -m, None, -m) for m in sweep_1d(points)]
 for name, meas in natsorted(buf_gates.items()):
    # trace = eval_series(meas, sweep_buf_inputs(41), extract_57xx_tx, y_idx=1)
    # trace = eval_series(meas, sweep_pos_input(0, 41), extract_57xx_tx, y_idx=1)
    trace = eval_series(meas, sweep_2n1p_input(41), extract_57xx_tx, y_idx=1)
    plot(f"{name}", "a1", trace)
    plot_slope(f"slope {name}", "a1", trace)
--- a/crates/applications/stacked_cores/scripts/plot_58xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_58xx.py
@@ -0,0 +1,43 @@
 #!/usr/bin/env python3
 from natsort import natsorted
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 def extract_58xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat tuple of input/output M mappings from a 58xx run
    """
    return (
        meas_rows[0].m[0],  # input (neg)
        meas_rows[0].m[2],  # I/O (pos)
        meas_rows[0].m[3],  # I/O (neg)
        meas_rows[0].m[5],  # input (pos)
        meas_rows[1].m[2],  # output (pos)
        meas_rows[1].m[3],  # output (neg)
        0.5 * (meas_rows[1].m[2] - meas_rows[1].m[3])  # output (diff)
    )
 buf_gates = read_db(lambda name: name.startswith("58-"))
 sweep_buf_inputs = lambda points=101: [(-m, m, -m, m, None, None, None) for m in sweep_1d(points)]
 sweep_mpos = lambda mneg, points=101: [(mneg, m, mneg, m, None, None, None) for m in sweep_1d(points)]
 for name, meas in natsorted(buf_gates.items()):
    # sweep Mneg = -Mpos
    # trace = eval_series(meas, sweep_buf_inputs(41), extract_58xx_tx, y_idx=6)
    # plot(f"{name}", "Mpos", trace)
    # plot_slope(f"slope {name}", "Mpos", trace)
    # sweep M0 with M1 fixed constant (to check for some `max(M+, M-)`-like effect
    trace = eval_series(meas, sweep_mpos(-5000, 41), extract_58xx_tx, y_idx=6)
    plot(f"{name}", "Mneg=-5000", trace)
    plot_slope(f"slope {name}", "Mneg=-5000", trace)
    trace = eval_series(meas, sweep_mpos(5000, 41), extract_58xx_tx, y_idx=6)
    plot(f"{name}", "Mneg=5000", trace)
    plot_slope(f"slope {name}", "Mneg=5000", trace)
--- a/crates/applications/stacked_cores/scripts/plot_60xx.py
+++ b/crates/applications/stacked_cores/scripts/plot_60xx.py
@@ -0,0 +1,30 @@
 #!/usr/bin/env python3
 from natsort import natsorted
 from stacked_cores_52xx import *
 from stacked_cores_52xx_plotters import *
 def extract_60xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat tuple of input/output M mappings from a 60xx run
    """
    return (
        meas_rows[0].m[0],  # input
        meas_rows[1].m[2],  # output
        # meas_rows[1].m[1],  # intermediate
        # meas_rows[1].m[3],  # intermediate
    )
 buf_gates = read_db(lambda name: name.startswith("60-"))
 sweep_inv_input = lambda points=101: [(m, None) for m in sweep_1d(points)]
 for name, meas in natsorted(buf_gates.items()):
    trace = eval_series(meas, sweep_inv_input(41), extract_60xx_tx, y_idx=1)
    plot(f"{name}", "M", trace)
    plot_slope(f"slope {name}", "M", trace)
--- a/crates/applications/stacked_cores/scripts/plot_inverters.py
+++ b/crates/applications/stacked_cores/scripts/plot_inverters.py
@@ -0,0 +1,307 @@
 #!/usr/bin/env python3
 from fake_cores_db import *
 from stacked_cores_40xx_db import *
 _3e10  =  "30000001024e0"
 _5e10  =  "49999998976e0"
 class SimParamsCascaded(SimParams):
    def __init__(self, p1: SimParams, p2: SimParams):
        super().__init__(p1.couplings, p1.wrappings_spec, p1.um, p1.drive_str)
        self.p1 = p1
        self.p2 = p2
    @property
    def is_inverter(self) -> bool:
        return self.p1.is_inverter ^ self.p2.is_inverter
    @property
    def human_name(self) -> str:
        return f"Cascade: {self.p1.human_name} -> {self.p2.human_name}"
 from_params = lambda l: [
    (p, get_meas(p)) for p in l if get_meas(p)
 ]
 # plot pre-40xx sims
 for (name, curve) in [
    # ("fake step", fwd_fake_step.logically_inverted()),
    # ("fake 1.5x", fwd_fake_1_5x.logically_inverted()),
    # ("fake slope-change", fwd_fake_slope_change_before_0_5.logically_inverted()),
    # ("fake slope-change (delayed)", fwd_fake_slope_change_after_0_5.logically_inverted()),
    # ("fake slope-change (delayed, shifted)", fwd_fake_slope_change_after_0_5.shifted_x(-0.1).logically_inverted()),
    # ("fake slope-change (delayed, shifted, inv-xy)", fwd_fake_slope_change_after_0_5.shifted_x(-0.1).shifted_y(-0.2).logically_inverted_x()),
    # ("fake slope-change (delayed, flipped)", fwd_fake_slope_change_after_0_5.logically_inverted_x().logically_inverted()),
    # ("fake hill", fwd_fake_hill.logically_inverted()),
    # ("fake asymmetric hill", fwd_fake_asymmetric_hill.logically_inverted()),
    # ("fake asymmetric flats", fwd_fake_asymmetric_flats.logically_inverted()),
    # ("fake asymmetric overdrive", fwd_fake_asymmetric_overdrive.logically_inverted()),
    # ("fake asymmetric bottom out", fwd_fake_asymmetric_bottom_out.logically_inverted()),
    # ("18", fwd_18.logically_inverted()),
    # ("24 5:1 (2e10 I)", fwd_24_5_1_2e10.logically_inverted()),
    # ("24 5:1 (5e10 I)", fwd_24_5_1_5e10.logically_inverted()),
    # ("24 5:1 (8e10 I)", fwd_24_5_1_8e10.logically_inverted()),
    # ("26", fwd_26.logically_inverted()),
    # ("38 1:0 (2e10 I)", fwd_38_1_0.logically_inverted()),
    # ("38 1:0 (5e10 I)", fwd_38_1_0_5e10.logically_inverted()),
    # ("38 2:0 (2e10 I)", fwd_38_2_0.logically_inverted()),
    # ("38 2:0 (5e10 I)", fwd_38_2_0_5e10.logically_inverted()),
    # ("38 3:0 (2e10 I)", fwd_38_3_0.logically_inverted()),
    # ("38 3:0 (5e10 I)", fwd_38_3_0_5e10.logically_inverted()),
    # ("38 4:0 (2e10 I)", fwd_38_4_0.logically_inverted()),
    # ("38 4:0 (5e10 I)", fwd_38_4_0_5e10.logically_inverted()),
    # ("39 2:0 (2e10 I)", inv_39_2_0_2e10),
    # ("39 2:0 (5e10 I)", inv_39_2_0_5e10),
    # ("39 2:0 (8e10 I)", inv_39_2_0_8e10),
    # ("39 2:0 (1e11 I)", inv_39_2_0_1e11),
    # ("39 2:0 (15e10 I)", inv_39_2_0_15e10),
 ]:
    curve.plot(title = f"{name} mapping")
    curve.logically_inverted().plot_slope(title = f"{name} slope")
    curve.plot_equilibrium(title = f"{name} equilibrium")
    # curve.plot_integral(title = f"{name} integrated")
 of_interest = []
 # plot select stims:
 # of_interest += filter_meas(rad_um=800, drive=5e10, couplings=12, wrappings=5)
 # of_interest += filter_meas(rad_um=800, drive=5e10, couplings=8, wrappings=11)
 # of_interest += filter_meas(rad_um=800, drive=5e10, couplings=12, wrappings=7)
 # of_interest += filter_meas(rad_um=800, drive=5e10, couplings=12, wrappings=5)
 # of_interest += filter_meas(rad_um=800, drive=5e10, couplings=10, wrappings=9)
 # of_interest += filter_meas(rad_um=800, drive=1e11, couplings=18, wrappings=5)
 # of_interest += filter_meas(rad_um=800, drive=1e11, couplings=12, wrappings=7)
 # of_interest += filter_meas(rad_um=800, drive=1e11, couplings=24, wrappings=3)
 # of_interest += [(p, c.shifted_y(-0.13)) for (p, c) in filter_meas(rad_um=800, drive=1e11, couplings=12, wrappings=7)]
 # of_interest += filter_meas(run="40", rad_um=800, drive=1e11, couplings=8, wrappings=11)
 # of_interest += filter_meas(run="40", rad_um=800, drive=1e11, couplings=10, wrappings=9)
 # of_interest += filter_meas(run="40", rad_um=1200, drive=1e11, couplings=12, wrappings=9)
 # of_interest += filter_meas(run="40", rad_um=1200, drive=1e11, couplings=10, wrappings=11)
 # of_interest += filter_meas(run="40", rad_um=1200, drive=2e11, couplings=12, wrappings=9)
 # of_interest += filter_meas(run="41", viable_inverter=True)
 # of_interest = [
 #     (p, c) for (p, c) in of_interest if p not in [
 #         SimParams41(9,  3, 600, "3e9"),
 #         SimParams41(9,  1, 400, "3e9"),
 #         SimParams41(10, 3, 800, "25e8"),
 #         SimParams41(9,  1, 400, "2e9"),
 #         SimParams41(10, 3, 800, "2e9"),
 #         SimParams41(10, 3, 800, "3e9"),
 #         SimParams41(10, 3, 800, "1e9"),
 #         SimParams41(10, 3, 800, "5e9"),
 #         SimParams41(10, 3, 800, "5e10"),
 #         SimParams41(9,  3, 600, "1e10"),
 #         SimParams41(9,  1, 400, "2e10"),
 #     ]
 # ]
 of_interest += from_params(
    [
        # SimParams41(4,  3, 400, "2e10"),
        # SimParams41(4,  3, 400, "4e10"),
        # SimParams41(16, 2, 800, "2e10"),
        # SimParams41(18, 1, 600, "3e9"),
        # SimParams41(18, 1, 600, "5e9"),
        # SimParams41(18, 1, 600, "1e10"),
        # SimParams41(18, 1, 600, "2e10"),
        # SimParams41(4,  3, 400, "1e10"),
        # SimParams41(9,  1, 400, "1e10"),
        # SimParams41(12, 2, 600, "1e10"),
        # SimParams41(12, 2, 600, "5e9"),
        # SimParams41(10, 3, 800, "2e10"),
        # SimParams41(6,  2, 400, "1e10"),
        # SimParams41(9,  3, 600, "2e10"),
        # SimParams41(10, 3, 800, "1e10"),
        # SimParams41(24, 1, 800,  "3e9"),
        # SimParams41(24, 1, 800,  "5e9"),
        # SimParams41(16, 2, 800,  "1e10"),
        # SimParams41(24, 2, 1200, "5e9"),
        # SimParams41(24, 2, 1200, "1e10"),
        # SimParams41(36, 1, 1200, "5e9"),
        # SimParams41(36, 1, 1200, "4e9"),
        # SimParams41(36, 1, 1200, "3e9"),
        # # SimParams41(9,  1, 400, "5e9"),
        # SimParams41(18, 0, 400, "1e10"),
        # SimParams41(18, 0, 400, "5e9"),
        # SimParams41(9,  1, 400, "5e9"),
        # SimParams41(9,  1, 400, "1e10"),
        # SimParams41(9,  1, 400, "2e10"),
    ]
 )
 all_viable_inverters = filter_meas(viable_inverter=True);
 # inverters with steepest starting slope
 inverters_with_steepest_slope0 = from_params(
    [
        SimParams48(5e2, 4e4, 4000, 200, 9, 1, 400, "1e10"),
        # SimParams48(5e2, 2e4, 2000, 100, 9, 1, 400, "1e10"),
        SimParams48(5e2, 1e4, 1000, 50,  9, 1, 400, "1e10"),
        SimParams48(1e3, 1e4, 2000, 100, 9, 1, 400, "1e10"),
        SimParams41(36, 1, 1200, "5e9"),
        SimParams41(24, 2, 1200, "1e10"),
        SimParams41(16, 2, 800,  "1e10"),
        SimParams41(12, 2, 600,  "1e10"),
    ]
 )
 _47xx_all = filter_meas(run="47")
 _48xx_all = filter_meas(run="48")
 _48xx_study = from_params(
    [
        # T0: y0=0.62, slope0=1.4 until x=0.20
        SimParams48(5e2, 1e4, 1000, 50,  9, 1, 400, "1e10"),
        # y0=0.67, slope0=1.0x until x=0.25
        # SimParams48(5e2, 1e4, 1000, 50,  9, 1, 400, "2e10"),
        # y0=0.55, slope0=1.2 until x=0.30
        SimParams48(5e2, 5e3, 1000, 50,  9, 1, 400, "1e10"),
        # # y0=0.60, slope0=0.96
        # SimParams48(5e2, 5e3, 1000, 50,  9, 1, 400, "2e10"),
        # y0=0.63, slope0=1.0 until x=0.15
        SimParams48(1e3, 1e4, 2000, 100, 9, 1, 400, "2e10"),
        # y0=0.57, slope0=1.25 until x=0.30
        SimParams48(1e3, 1e4, 2000, 100, 9, 1, 400, "1e10"),
        # y0=0.47, slope0=1.1 until x=0.30
        SimParams48(1e3, 1e4, 2000, 100, 9, 1, 400, "5e9"),
        # y0=0.52, slope0=1.3 until x=0.20
        SimParams48(5e2, 1e4, 1000, 50,  9, 1, 400, "5e9"),
        # y0=0.67, slope0=1.6 until x=0.20
        SimParams48(5e2, 2e4, 2000, 100, 9, 1, 400, "1e10"),
        # y0=0.57, slope0=1.3 until x=0.20
        SimParams48(5e2, 2e4, 2000, 100, 9, 1, 400, "5e9"),
        # y0=0.70, slope0=1.7 until x=0.15
        SimParams48(5e2, 4e4, 4000, 200, 9, 1, 400, "1e10"),
        # y0=0.59, slope0=1.4 until x=0.20
        SimParams48(5e2, 4e4, 2000, 100, 9, 1, 400, "5e9"),
        # y0=0.64, slope0=1.4 until x=0.20
        SimParams48(2e2, 1e4, 1000, 50,  9, 1, 400, "1e10"),
        # y0=0.71, slope0=1.6 until x=0.15
        SimParams48(5e2, 1e5, 10000, 500, 9, 1, 400, "1e10"),
        # y0=0.62, slope0=1.3 until x=0.20
        SimParams48(5e2, 1e5, 10000, 500, 9, 1, 400, "5e9"),
        # y0=0.90, slope0=1.3 to x=0.04
        SimParams48(5e2, 2e4, 2000, 100, 6, 2, 400, "2e10"),
    ]
 )
 _49xx_study = from_params(
    [
        # slope ranges are measured from x=0 to x=0.2
        # y(0)=0.90, y(1)=0.99, slope0>0.28
        SimParams50(5e2, 4e4, 4000, 200,  5, 1, 400, "2e10"),
        # y(0)=0.79, y(1)=0.98, slope0=0.36 to 0.27
        SimParams50(5e3, 4e4, 4000, 200,  5, 1, 400, "2e10"),
        # y(0)=0.65, y(1)=0.95, slope0=0.44 to 0.31
        SimParams50(5e3, 4e4, 4000, 200,  5, 1, 400, "1e10"),
        # y(0)=0.09, y(1)=0.17, slope0=0.07. "best" comparable 47-xx sim
        # SimParams47(5,  1, 400,  "1e10"),
        # y(0)=0.81, y(1)=0.99, slope0=0.50 to 0.30
        SimParams50(1e3, 2e4, 2000, 100, 5, 1, 400, "2e10"),
        # y(0)=0.60, y(1)=0.89, slope0=0.40 to 0.29
        SimParams50(2e3, 2e4, 2000, 100, 5, 1, 400, "1e10"),
        # y(0)=0.73, y(1)=0.97, slope0=0.44 to 0.32
        SimParams50(2e3, 2e4, 2000, 100, 5, 1, 400, "2e10"),
        # y(0)=0.62, y(1)=0.80, slope0=0.23 to 0.22
        SimParams50(5e2, 1e4, 1000, 50,   5, 1, 400, "1e10"),
        # y(0)=0.65, y(1)=0.80, slope0>0.18
        SimParams50(5e2, 2e4, 2000, 100,  5, 1, 400, "1e10"),
        # y(0)=0.63, y(1)=0.89, slope0>0.27
        SimParams50(1e3, 2e4, 2000, 100, 5, 1, 400, "1e10"),
        # y(0)=0.46, y(1)=0.66, slope0>0.23
        SimParams50(2e3, 2e4, 2000, 100, 5, 1, 400, "5e9"),
        # y(0)=0.50, y(1)=0.74, slope0>0.25
        SimParams50(5e3, 2e4, 2000, 100, 5, 1, 400, "1e10"),
        # y(0)=0.61, y(1)=0.86, slope0>0.25
        SimParams50(5e3, 2e4, 2000, 100, 5, 1, 400, "2e10"),
        # y(0)=0.39, y(1)=0.48, slope0>0.11
        # SimParams50(1e3, 2e4, 2000, 100, 5, 1, 400, "5e9"),
        # y(0)=0.40, y(1)=0.56, slope0>0.15
        # SimParams50(1e4, 2e4, 2000, 100, 5, 1, 400, "1e10"),
        # y(0)=0.51, y(1)=0.68, slope0>0.16
        # SimParams50(1e4, 2e4, 2000, 100, 5, 1, 400, "2e10"),
        # y(0)=0.30, y(1)=0.40, slope0>0.09
        # SimParams50(2e4, 2e4, 2000, 100, 5, 1, 400, "1e10"),
    ]
 )
 _51xx_study = from_params(
    [
        SimParams51(5e2, 2e4, 2000, 100, 5, 1, 400, _5e10),
        SimParams51(1e3, 2e4, 2000, 100, 5, 1, 400, "2e10"),
        SimParams51(2e3, 2e4, 2000, 100, 5, 1, 400, "1e10"),
        SimParams51(2e3, 2e4, 2000, 100, 5, 1, 400, "2e10"),
        SimParams51(2e3, 2e4, 2000, 100, 5, 1, 400, _3e10),
        SimParams51(5e3, 2e4, 2000, 100, 5, 1, 400, _3e10),
        SimParams51(5e3, 2e4, 2000, 100, 5, 1, 400, _5e10),
        SimParams51(2e3, 2e4, 2000, 100, 3, 2, 400, "2e10"),
        SimParams51(2e3, 2e4, 2000, 100, 3, 2, 400, _5e10),
        SimParams51(2e3, 2e4, 2000, 100, 2, 3, 400, _5e10),
        SimParams51(2e3, 2e4, 2000, 100, 2, 3, 400, 1e11),
    ]
 )
 # of_interest += filter_meas(run="40")
 # of_interest += filter_meas(run="42", wrappings=7)
 # of_interest += filter_meas(rad_um=400, run="41")
 # of_interest += filter_meas(run="40", rad_um=800, couplings=18, wrappings=5)
 # of_interest += filter_meas(run="41", viable_inverter=True)
 # of_interest += filter_meas(run="42", rad_um=400, couplings=4)
 # of_interest += filter_meas(run="42", rad_um=400, couplings=9)
 # of_interest += filter_meas(run="42", rad_um=400, couplings=2)
 # of_interest += filter_meas(run="42", rad_um=400, couplings=6)
 # of_interest += filter_meas(run="41")
 # of_interest += filter_meas(run="48")
 # of_interest += filter_meas(run="48", coupling_cond=1e4, drive=1e10)
 # of_interest += filter_meas(run="48", coupling_cond=1e4, drive=5e9)
 # of_interest += inverters_with_steepest_slope0
 # of_interest += _47xx_all
 # of_interest += _48xx_study
 # of_interest += _49xx_study
 of_interest += _51xx_study
 # plot cascaded inverter -> buffer
 # for (inv_p, inv_curve) in filter_meas(is_inverter=True):
 #     for (fwd_p, fwd_curve) in filter_meas(rad_um=400, is_inverter=False):
 #         of_interest += [ (SimParamsCascaded(inv_p, fwd_p), inv_curve.cascaded(fwd_curve)) ]
 # plot cascaded buffer -> inverter
 # for (fwd_p, fwd_curve) in filter_meas(run="41", is_inverter=False):
 #     for (inv_p, inv_curve) in filter_meas(is_inverter=True):
 #         of_interest += [ (SimParamsCascaded(fwd_p, inv_p), fwd_curve.cascaded(inv_curve)) ]
 # of_interest += filter_meas(is_inverter=False)
 # of_interest += filter_meas(is_inverter=True)
 # of_interest.sort(key = lambda i: -i[1].max_abs_slope())
 # of_interest.sort(key = lambda i: -i[1].get_range())  # output range
 # of_interest.sort(key = lambda i: i[1].get(0.5) - i[1].get(1.0))  # delayed output swing
 # of_interest.sort(key = lambda i: i[1].get(0.5) - i[1].get(0.0))  # early output swing
 # of_interest.sort(key = lambda i: i[1].get_repeated(1.0) - i[1].get_repeated(0.0))  # inverter strength
 for (params, curve) in of_interest:
    curve = curve.flat_extrapolation()
    fwd = curve.logically_inverted() if params.is_inverter else curve
    fwd.plot(title = f"{params.human_name} mapping")
    fwd.plot_slope(title = f"{params.human_name} slope")
    inv = fwd.logically_inverted()
    # if params.is_inverter or True:
    #     inv.plot_equilibrium(title = f"{params.human_name} equilibrium")
--- a/crates/applications/stacked_cores/scripts/stacked_cores.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores.py
@@ -0,0 +1,46 @@
 def load_csv(path: str):
    """
    returns (header: list[str], rows: list[list[T]])
    """
    header = []
    rows = []
    for i, line in enumerate(open(path).read().strip().split('\n')):
        if i == 0:
            header = line.split(',')
        else:
            rows.append(eval(line))
    return header, rows
 def labeled_rows(header: list, rows: list):
    """
    return a list of dicts,
    transforming each row into a kv map
    """
    new_rows = []
    for row in rows:
        new_rows.append({ header[i]: elem for i, elem in enumerate(row) })
    return new_rows
 def last_row_before_t(rows: list, t: float):
    """
    return the last row for which row[time] < t
    """
    prev_row = None
    for row in rows:
        if row["time"] >= t:
            break
        prev_row = row
    return prev_row
 def extract_m(row: dict) -> list:
    """
    return [M(state0), M(state1), ...]
    """
    m = []
    for k, v in row.items():
        if k.startswith('M(state') and k.endswith(')'):
            n = int(k[len('M(state'):-1])
            assert n == len(m)
            m.append(v)
    return m
--- a/crates/applications/stacked_cores/scripts/stacked_cores_12xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_12xx.py
@@ -0,0 +1,50 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 12-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_12xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_start = last_row_before_t(rows, 2e-9)
    tx_end = last_row_before_t(rows, 3e-9)
    noop_start = last_row_before_t(rows, 5e-9)
    noop_end = last_row_before_t(rows, 6e-9)
    m_tx_start = extract_m(tx_start)
    m_tx_end = extract_m(tx_end)
    m_noop_start = extract_m(noop_start)
    m_noop_end = extract_m(noop_end)
    m1_tx = abs(m_tx_end[1] - m_tx_start[1])
    m1_noop = abs(m_noop_end[1] - m_noop_start[1])
    m_tx_arr = [round(abs(m_tx_end[i] - m_tx_start[i])) for i in [0, 2]]
    m_tx = sum(m_tx_arr)
    m_noop_arr = [round(abs(m_noop_end[i] - m_noop_start[i])) for i in [0, 2]]
    m_noop = sum(m_noop_arr)
    ratio_tx_noop = m_tx / m_noop
    ratio_tx_m1 = m_tx / m1_tx
    ratio_tx_noop_m1 = (m_tx - m_noop) / m1_tx
    print(f'm1 tx: {m1_tx}  ({m_tx_start[1]} -> {m_tx_end[1]})')
    print(f'm1 noop: {m1_noop}  ({m_noop_start[1]} -> {m_noop_end[1]})')
    print('')
    print(f'm(tx):   {m_tx_start}')
    print(f'      -> {m_tx_end}')
    print('')
    print(f'm(noop): {m_noop_start}')
    print(f'      -> {m_noop_end}')
    print('')
    print(f'tx/noop: {ratio_tx_noop:.3}')
    print(f'tx/m1: {ratio_tx_m1:.3}')
    print(f'(tx-noop)/m1: {ratio_tx_noop_m1:.3}')
 if __name__ == '__main__':
    extract_12xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_17xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_17xx.py
@@ -0,0 +1,36 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 17-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_17xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_start_0 = last_row_before_t(rows, 2e-9)
    tx_start_1 = last_row_before_t(rows, 4e-9)
    tx_end = last_row_before_t(rows, 5e-9)
    m_tx_start_0 = extract_m(tx_start_0)
    m_tx_start_1 = extract_m(tx_start_1)
    m_tx_end = extract_m(tx_end)
    m0_switch = abs(m_tx_start_1[0] - m_tx_start_0[0])
    m_middle_switch = sum(abs(e - s) for (e, s) in zip(m_tx_start_1[1:-1], m_tx_start_0[1:-1]))
    m_middle_clear = sum(abs(e - s) for (e, s) in zip(m_tx_end[1:-1], m_tx_start_1[1:-1]))
    m_last_switch = abs(m_tx_end[-1] - m_tx_start_1[-1])
    print(f'm0: {m0_switch} ({m_tx_start_0[0]} -> {m_tx_start_1[0]})')
    print(f'm_middle: {m_middle_switch}')
    print(f'm_middle_clear: {m_middle_clear}')
    print(f'm_last: {m_last_switch} ({m_tx_start_1[-1]} -> {m_tx_end[-1]})')
    print('')
    print(f'm:   {m_tx_start_0}')
    print(f'  -> {m_tx_start_1}')
    print(f'  -> {m_tx_end}')
 if __name__ == '__main__':
    extract_17xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_18xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_18xx.py
@@ -0,0 +1,36 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 18-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_18xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fwd = last_row_before_t(rows, 4e-9)
    tx_rev = last_row_before_t(rows, 6e-9)
    m_init = extract_m(tx_init)
    m_fwd = extract_m(tx_fwd)
    m_rev = extract_m(tx_rev)
    m0_fwd = abs(m_init[0] - m_fwd[0])
    m0_rev = abs(m_rev[0] - m_fwd[0])
    m_rest_fwd = sum(e - s for (e, s) in zip(m_fwd[1:], m_init[1:]))
    m_rest_rev = sum(s - e for (e, s) in zip(m_rev[1:], m_fwd[1:]))
    print(f'\t- m0: {m_init[0]} -> {m_fwd[0]} -> {m_rev[0]}')
    print(f'\t- m0 fwd: {m0_fwd}')
    print(f'\t- m0 rev: {m0_rev}')
    print(f'\t- m_middle fwd: {m_rest_fwd}')
    print(f'\t- m_middle rev: {m_rest_rev}')
    print(f'\t- m:   {m_init}')
    print(f'\t    -> {m_fwd}')
    print(f'\t    -> {m_rev}')
 if __name__ == '__main__':
    extract_18xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_24xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_24xx.py
@@ -0,0 +1,27 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 24-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_24xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fini = last_row_before_t(rows, 3e-9)
    m_init = extract_m(tx_init)
    m_fini = extract_m(tx_fini)
    m0 = -(m_fini[0] - m_init[0])
    m1 = m_fini[1] - m_init[1]
    print(f'\t- m0: {m0} ({m_init[0]} -> {m_fini[0]})')
    print(f'\t- m1: {m1} ({m_init[1]} -> {m_fini[1]})')
    print(f'\t- amp: {m1/m0}')
 if __name__ == '__main__':
    extract_24xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_28xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_28xx.py
@@ -0,0 +1,29 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 28-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_28xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fini = last_row_before_t(rows, 3e-9)
    m_init = extract_m(tx_init)
    m_fini = extract_m(tx_fini)
    m0 = -(m_fini[0] - m_init[0])
    m1 = m_fini[1] - m_init[1]
    m2 = -(m_fini[2] - m_init[2])
    print(f'\t- m0: {m0} ({m_init[0]} -> {m_fini[0]})')
    print(f'\t- m1: {m1} ({m_init[1]} -> {m_fini[1]})')
    print(f'\t- m2: {m2} ({m_init[2]} -> {m_fini[2]})')
    print(f'\t- amp: {m2/m0}')
 if __name__ == '__main__':
    extract_28xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_29xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_29xx.py
@@ -0,0 +1,38 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 29-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_29xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_mid = last_row_before_t(rows, 4e-9)
    tx_fini = last_row_before_t(rows, 5e-9)
    m_init = extract_m(tx_init)
    m_mid = extract_m(tx_mid)
    m_fini = extract_m(tx_fini)
    m0 = -(m_mid[0] - m_init[0])
    m1_first = m_mid[1] - m_init[1]
    m2_first = m_mid[2] - m_init[2]
    m1_second= m_fini[1] - m_mid[1]
    m2_second= -(m_fini[2] - m_mid[2])
    m1 = m1_first + m1_second
    print(f'\t- m0: {m0} ({m_init[0]} -> {m_mid[0]})')
    print(f'\t- m1: {m1}')
    print(f'\t\t- from m0: {m1_first}')
    print(f'\t\t- from m2: {m1_second}')
    print(f'\t      {m_init[1]} -> {m_mid[1]} -> {m_fini[1]}')
    print(f'\t- m2: {m2_second}')
    print(f'\t      {m_init[2]} -> {m_mid[2]} -> {m_fini[2]})')
    print(f'\t- amp: {m1/m0}')
 if __name__ == '__main__':
    extract_29xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_33xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_33xx.py
@@ -0,0 +1,32 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 33-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_33xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fini = last_row_before_t(rows, 3e-9)
    m_init = extract_m(tx_init)
    m_fini = extract_m(tx_fini)
    m0 = -(m_fini[0] - m_init[0])
    m1 = m_fini[1] - m_init[1]
    m2 = -(m_fini[2] - m_init[2])
    m3 = m_fini[3] - m_init[3]
    print(f'\t- madj: {m0 + m2 - m3}')
    print(f'\t\t- m0: {m_init[0]} -> {m_fini[0]}')
    print(f'\t\t- m2: {m_init[2]} -> {m_fini[2]}')
    print(f'\t\t- m3: {m_init[3]} -> {m_fini[3]}')
    print(f'\t- m1: {m1}')
    print(f'\t\t- {m_init[1]} -> {m_fini[1]}')
 if __name__ == '__main__':
    extract_33xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_34xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_34xx.py
@@ -0,0 +1,30 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 34-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_34xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fini = last_row_before_t(rows, 3e-9)
    m_init = extract_m(tx_init)
    m_fini = extract_m(tx_fini)
    m0 = -(m_fini[0] - m_init[0])
    m1 = m_fini[1] - m_init[1]
    m2 = -(m_fini[2] - m_init[2])
    print(f'\t- madj: {m0 - m2}')
    print(f'\t\t- m0: {m_init[0]} -> {m_fini[0]}')
    print(f'\t\t- m2: {m_init[2]} -> {m_fini[2]}')
    print(f'\t- m1: {m1}')
    print(f'\t\t- {m_init[1]} -> {m_fini[1]}')
 if __name__ == '__main__':
    extract_34xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_36xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_36xx.py
@@ -0,0 +1,34 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 36-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_36xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fini = last_row_before_t(rows, 3e-9)
    m_init = extract_m(tx_init)
    m_fini = extract_m(tx_fini)
    m0 = -(m_fini[0] - m_init[0])
    m1 = m_fini[1] - m_init[1]
    m2 = -(m_fini[2] - m_init[2])
    m3 = -(m_fini[3] - m_init[3])
    m4 = m_fini[4] - m_init[4]
    print(f'\t- madj: {m0 + m2 + m3 - m4}')
    print(f'\t\t- m0: {m_init[0]} -> {m_fini[0]}')
    print(f'\t\t- m2: {m_init[2]} -> {m_fini[2]}')
    print(f'\t\t- m3: {m_init[3]} -> {m_fini[3]}')
    print(f'\t\t- m4: {m_init[4]} -> {m_fini[4]}')
    print(f'\t- m1: {m1}')
    print(f'\t\t- {m_init[1]} -> {m_fini[1]}')
 if __name__ == '__main__':
    extract_36xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_37xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_37xx.py
@@ -0,0 +1,40 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 37-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_37xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fini = last_row_before_t(rows, 3e-9)
    m_init = extract_m(tx_init)
    m_fini = extract_m(tx_fini)
    m0 = -(m_fini[0] - m_init[0])
    m1 = -(m_fini[1] - m_init[1])
    m2 = -(m_fini[2] - m_init[2])
    m3 = m_fini[3] - m_init[3]
    m4 = m_fini[4] - m_init[4]
    m5 = m_fini[5] - m_init[5]
    m6 = m_fini[6] - m_init[6]
    print(f'\t- madj: {m0 + m1 + m2 - m4 - m5 - m6}')
    print(f'\t\t- m0: {m_init[0]} -> {m_fini[0]}')
    print(f'\t\t- m1: {m_init[1]} -> {m_fini[1]}')
    print(f'\t\t- m2: {m_init[2]} -> {m_fini[2]}')
    print(f'\t\t- m4: {m_init[4]} -> {m_fini[4]}')
    print(f'\t\t- m5: {m_init[5]} -> {m_fini[5]}')
    print(f'\t\t- m6: {m_init[6]} -> {m_fini[6]}')
    print(f'\t- m3: {m3}')
    print(f'\t\t- {m_init[3]} -> {m_fini[3]}')
 if __name__ == '__main__':
    extract_37xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_38xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_38xx.py
@@ -0,0 +1,29 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 38-xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_38xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, 2e-9)
    tx_fini = last_row_before_t(rows, 3e-9)
    m_init = extract_m(tx_init)
    m_fini = extract_m(tx_fini)
    m0 = -(m_fini[0] - m_init[0])
    madj = sum(init - fini for (init, fini) in zip(m_init[1:], m_fini[1:]))
    print(f'\t- madj: {madj}')
    for i, (init, fini) in enumerate(zip(m_init[1:], m_fini[1:])):
        print(f'\t\t- m{i+1}: {init} -> {fini}')
    print(f'\t- m0: {m0}')
    print(f'\t\t- {m_init[0]} -> {m_fini[0]}')
 if __name__ == '__main__':
    extract_38xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_39xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_39xx.py
@@ -0,0 +1,76 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 39-xx demos
 to extract higher-level info from them.
 """
 import os
 import sys
 import re
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_one(path: str, t_first: float, t_last: float, t_mid: float = None):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_init = last_row_before_t(rows, t_first)
    tx_fini = last_row_before_t(rows, t_last)
    tx_mid = last_row_before_t(rows, t_mid) if t_mid is not None else None
    if tx_fini and float(tx_fini["time"]) < 0.95 * t_last:
        tx_fini = None
    m_init = extract_m(tx_init) if tx_init is not None else [None]
    m_fini = extract_m(tx_fini) if tx_fini is not None else [None]
    m_mid = extract_m(tx_mid)[1:-1] if tx_mid is not None else []
    return m_init[0], m_fini[-1], m_mid
 def extract_polarity(stem: str) -> float:
    s = None
    if re.search("-p\d\d\d\d?", stem):
        s = re.search("-p\d\d\d\d?", stem).group(0)
    if re.search("-n\d\d\d\d?", stem):
        s = re.search("-n\d\d\d\d?", stem).group(0)
    if s:
        sign = {'n': -1, 'p': 1}[s[1]]
        mag = int(s[2:])
        max_mag = 10**(len(s[2:]) - 1)
        return sign * mag / max_mag
    if "-000" in stem:
        return 0.00
 def extract_39xx(base_path: str, t_first: str = "2e-9", t_last: str = "3e-9", t_mid: str = None):
    t_first = float(t_first)
    t_last = float(t_last)
    t_mid = float(t_mid) if t_mid is not None else None
    base_dir, prefix = os.path.split(base_path)
    mappings = {}
    for entry in os.listdir(base_dir):
        if entry.startswith(prefix):
            (input_, output, mid) = extract_one(os.path.join(base_dir, entry, "meas.csv"), t_first, t_last, t_mid)
            polarity = extract_polarity(entry)
            if input_ is not None and output is not None:
                mappings[int(round(input_))] = (int(round(output)), polarity, [round(m) for m in mid])
    if mappings:
        print("Piecewise(")
        print("    [")
        for i, (o, polarity, mid) in sorted(mappings.items()):
            comments = []
            if polarity is not None:
                comments += [f"{polarity:= 05.3f}"]
            for core, val in enumerate(mid):
                comments += [f"M{core+1}={val:5}"]
            comment = "  # " + ", ".join(comments) if comments else ""
            print(f"        [ {i:6}, {o:6} ],{comment}")
        print("    ]")
        print(")")
 if __name__ == '__main__':
    extract_39xx(*sys.argv[1:])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_39xx_and_earlier_db.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_39xx_and_earlier_db.py
@@ -0,0 +1,377 @@
 from inverter_characteristics import Piecewise
 fwd_17_4_0_8e10 = Piecewise(
    [
        [ -16381,   6688 ],
        [ -15885,   6778 ],
        [ -14831,   6878 ],
        [ -13622,   7004 ],
        [   -883,   8528 ],
        [   6252,   9496 ],
        [   7846,   9703 ],
        [   8148,   9766 ],
        [   8425,   9831 ],
        [   8705,   9892 ],
        [   8988,   9916 ],
        [   9866,  10114 ],
        [  11179,  10234 ],
        [  12033,  10382 ],
        [  12491,  10422 ],
        [  13135,  10494 ],
        [  14363,  10649 ],
    ]
 ).normalized(17000)
 fwd_18 = Piecewise(
    [
        [ -16206, -1131 ],
        [ -15192,  -746 ],
        [ -12827,    33 ],
        [   -642,  4990 ],
        [  13082,  9652 ],
        [  16696, 10600 ],
    ]
 ).normalized(17000)
 fwd_24_5_1_2e10 = Piecewise(
    [
        [ -12912,  -8487 ],
        [  -4754,  -6045 ],
        [   2687,  -2560 ],
        [   3936,  -1774 ],
        [   4267,  -1517 ],
        [   4504,  -1314 ],
        [   4710,  -1132 ],
        [   4820,  -1075 ],
        [   4884,  -1042 ],
        [   4948,  -1012 ],
        [   5046,   -968 ],
        [   5205,   -897 ],
        [   5364,   -829 ],
        [   5525,   -760 ],
        [   5843,   -622 ],
        [   6764,   -197 ],
        [   9467,    788 ],
    ]
 ).normalized(15000)
 fwd_24_5_1_5e10 = Piecewise(
    [
        [ -15208,  -6303 ],
        [ -13396,  -5388 ],
        [ -11992,  -4516 ],
        [ -11991,  -4499 ],
        [  -9379,  -2953 ],
        [  -4757,    531 ],
        [     -2,   4734 ],
        [   3074,   7760 ],
        [   4854,   9784 ],
        [   5611,  10736 ],
        [   5994,  11126 ],
        [   6298,  11404 ],
        [   6678,  11757 ],
        [   7196,  12200 ],
        [   7667,  12589 ],
        [   8238,  13048 ],
        [   8239,  13046 ],
        [   9613,  14027 ],
        [  10585,  14622 ],
        [  12048,  15346 ],
    ]
 ).normalized(17000)
 fwd_24_5_1_8e10 = Piecewise(
    [
        [ -16412,  -3392 ],
        [ -15266,  -2681 ],
        [ -14036,  -1897 ],
        [ -12789,  -1110 ],
        [  -8766,   1588 ],
        [  -2052,   6544 ],
        [   2389,   9989 ],
        [   4225,  11437 ],
        [   5194,  12182 ],
        [   5971,  12438 ],
        [   6901,  12937 ],
        [   8308,  13632 ],
        [   9910,  14365 ],
        [  10583,  14662 ],
        [  11240,  14850 ],
        [  12114,  15171 ],
        [  13862,  15600 ],
    ]
 ).normalized(17000)
 fwd_26 = Piecewise(
    [
        [ -14687, -7326 ],
        [ -13049, -6503 ],
        [ -11785, -5833 ],
        [ -4649,  -1447 ],
        [ 4961,    7059 ],
        [ 11283,  11147 ],
    ]
 ).normalized(17000)
 fwd_38_1_0 = Piecewise(
    [
        [ -12817, -8131 ],  # -1.00
        [ -12239, -7798 ],  # -0.80
        [ -4859,  -2587 ],  # -0.50
        [ 1490,    3012 ],  # -0.30
        [ 3866,    5327 ],  # -0.20
        [ 6030,    7237 ],  # -0.10
        [ 7747,    8357 ],  #  0.00
        [ 9494,    9202 ],  # +0.10
        [ 11261,  10011 ],  # +0.20
        [ 12941,  10808 ],  # +0.30
        [ 15415,  11986 ],  # +0.50
        [ 16196,  12375 ],  # +0.80
        # [ 16182,  12352 ],  # +1.00
    ]
 ).normalized(17000)
 fwd_38_1_0_5e10 = Piecewise(
    [
        [ -16180, -7079 ],  # -1.00
        [ -14443, -5965 ],  # -0.50
        [ -5579,    -13 ],  # -0.20
        [ 10033,   7676 ],  #  0.00
        [ 14986,   9375 ],  # +0.20
        [ 15149,   9606 ],  # +0.50
        [ 15801,   9924 ],  # +1.00
    ]
 ).normalized(17000)
 fwd_38_2_0 = Piecewise(
    [
        [ (-13745 + -13012)/2, -6222 ],  # -1.00
        [ (-13097 + -12338)/2, -5662 ],  # -0.80
        [ (-4969 + -4744)/2,    2373 ],  # -0.50
        [ (535 + 611)/2,        8793 ],  # -0.30
        [ (1772 + 2070)/2,     10467 ],  # -0.20
        [ (3143 + 3200)/2,     11906 ],  # -0.10
        [ (4472 + 4114)/2,     12921 ],  #  0.00
        [ (5838 + 5144)/2,     13788 ],  # +0.10
        [ (7221 + 6291)/2,     14530 ],  # +0.20
        [ (8558 + 7644)/2,     15127 ],  # +0.30
        [ (11159 + 10397)/2,   15865 ],  # +0.50
        [ (12778 + 14243)/2,   16162 ],  # +0.80
        [ (12430 + 15653)/2,   16202 ],  # +1.00
    ]
 ).normalized(17000)
 fwd_38_2_0_5e10 = Piecewise(
    [
        [ (-16386 + -16170)/2, -3490 ],  # -1.00
        [ (-16107 + -15529)/2, -3035 ],  # -0.80
        [ (-15075 + -14122)/2, -1827 ],  # -0.50
        [ (-13387 + -12396)/2,   -63 ],  # -0.30
        [ (-5358 + -5201)/2,    7423 ],  # -0.20
        [ (2355 + 1719)/2,     12039 ],  # -0.10
        [ (7563 + 5962)/2,     13479 ],  #  0.00
        [ (10617 + 9318)/2,    14282 ],  # +0.10
        [ (12779 + 12447)/2,   14796 ],  # +0.20
        [ (12649 + 15269)/2,   15034 ],  # +0.30
        [ (13077 + 16320)/2,   15140 ],  # +0.50
        [ (14410 + 16557)/2,   15260 ],  # +0.80
        [ (15281 + 16623)/2,   15331 ],  # +1.00
    ]
 ).normalized(17000)
 fwd_38_3_0 = Piecewise(
    [
        [ (-13956 + -13890 + -13077)/3, -5203 ],  # -1.00
        [ (-13292 + -13161 + -12374)/3, -4518 ],  # -0.80
        [ (-4979 + -4885  + -4717)/3,    5051 ],  # -0.50
        [ (381 + -153 + 31)/3,          11264 ],  # -0.30
        [ (1531 + 503 + 1006)/3,        12509 ],  # -0.20
        [ (2862 + 1120 + 1743)/3,       13549 ],  # -0.10
        [ (4180 + 1821 + 2239)/3,       14386 ],  #  0.00
        [ (5560 + 2564 + 2899)/3,       15033 ],  # +0.10
        [ (6986 + 3436 + 3701)/3,       15451 ],  # +0.20
        [ (8358 + 4396 + 4732)/3,       15738 ],  # +0.30
        [ (10482 + 6644 + 7735)/3,      16081 ],  # +0.50
        [ (11246 + 12478 + 12663)/3,    16343 ],  # +0.80
        [ (11436 + 13343 + 14411)/3,    16380 ],  # +1.00
    ]
 ).normalized(17000)
 fwd_38_3_0_5e10 = Piecewise(
    [
        [ (-16403 + -16389 + -16152)/3, -1175 ],  # -1.00
        [ (-16134 + -16084 + -15471)/3,  -701 ],  # -0.80
        [ (-15192 + -14891 + -14016)/3,   777 ],  # -0.50
        [ (-13512 + -13089 + -12278)/3,  2939 ],  # -0.30
        [ (-5248 + -5187 + -5032)/3,    11125 ],  # -0.20
        [ (2099 + 645 + 708)/3,         14046 ],  # -0.10
        [ (7045 + 3536 + 3757)/3,       14557 ],  #  0.00
        [ (9729 + 6054 + 6543)/3,       14967 ],  # +0.10
        [ (11453 + 9238 + 10081)/3,     15393 ],  # +0.20
        [ (11572 + 13274 + 13839)/3,    15759 ],  # +0.30
        [ (12534 + 15192 + 16090)/3,    15925 ],  # +0.50
        [ (14013 + 16353 + 16508)/3,    16007 ],  # +0.80
        [ (14944 + 16565 + 16606)/3,    16033 ],  # +1.00
    ]
 ).normalized(17000)
 fwd_38_4_0 = Piecewise(
    [
        [ (-14020 + -14112 + -13935 + -13091)/4, -4701 ],  # -1.00
        [ (-13353 + -13363 + -13185 + -12381)/4, -3947 ],  # -0.80
        [ (-4982 + -4912 + -4870 + -4696)/4,      6398 ],  # -0.50
        [ (338 + -243 + -352 + -254)/4,          12205 ],  # -0.30
        [ (1469 + 303 + 107 + 510)/4,            13165 ],  # -0.20
        [ (2789 + 839 + 443 + 1089)/4,           13989 ],  # -0.10
        [ (4150 + 1560 + 653 + 1416)/4,          14727 ],  #  0.00
        [ (5562 + 2421 + 979 + 1899)/4,          15224 ],  # +0.10
        [ (7027 + 3336 + 1460 + 2518)/4,         15551 ],  # +0.20
        [ (8402 + 4357 + 2093 + 3293)/4,         15802 ],  # +0.30
        [ (10385 + 7210 + 5554 + 5998)/4,        16123 ],  # +0.50
        [ (11301 + 12558 + 13004 + 12508)/4,     16388 ],  # +0.80
        [ (11462 + 13429 + 13829 + 13370)/4,     16408 ],  # +1.00
    ]
 ).normalized(17000)
 fwd_38_4_0_5e10 = Piecewise(
    [
        [ (-16395 + -16418 + -16377 + -16134)/4,  346 ],  # -1.00
        [ (-16128 + -16140 + -16066 + -15431)/4,  843 ],  # -0.80
        [ (-15217 + -15075 + -14834 + -13958)/4, 2407 ],  # -0.50
        [ (-13556 + -13263 + -13009 + -12226)/4, 4772 ],  # -0.30
        [ (-5186 + -5134 + -5083 + -4922)/4,    12813 ],  # -0.20
        [ (1894 + 537 + -149 + 331)/4,          14556 ],  # -0.10
        [ (6845 + 3237 + 1699 + 2725)/4,        14936 ],  # 0.00
        [ (9591 + 5649 + 3599 + 5023)/4,        15250 ],  # +0.10
        [ (11255 + 8605 + 7601 + 8571)/4,       15647 ],  # +0.20
        [ (11268 + 12837 + 13307 + 13021)/4,    15944 ],  # +0.30
        [ (12388 + 14744 + 15209 + 15530)/4,    16104 ],  # +0.50
        [ (13871 + 16190 + 16345 + 16412)/4,    16178 ],  # +0.80
        [ (14774 + 16497 + 16563 + 16559)/4,    16197 ],  # +1.00
    ]
 ).normalized(17000)
 inv_39_2_0_2e10 = Piecewise(
    [
        [ -12902,  10759 ],
        [ -12339,  11336 ],
        [  -8581,  11274 ],
        [  -4821,  10571 ],
        [   -822,   9463 ],
        [   3117,   8265 ],
        [   4938,   7704 ],
        [   6441,   7221 ],
        [   7234,   6912 ],
        [   7844,   6662 ],
        [   8282,   6551 ],
        [   8674,   6443 ],
        [   9071,   6325 ],
        [   9479,   6191 ],
        [  10311,   5885 ],
        [  11153,   5541 ],
        [  12833,   4788 ],
        [  14097,   4071 ],
        [  14561,   3816 ],
    ]
 ).normalized(15000)
 inv_39_2_0_5e10 = Piecewise(
    [
        [ -15691,   9609 ],
        [ -15154,   9450 ],
        [ -14498,   9327 ],
        [ -14086,   9217 ],
        [ -13501,   9113 ],
        [ -12664,   8923 ],
        [  -9677,   7937 ],
        [  -4868,   5948 ],
        [    222,   3390 ],
        [   5223,    610 ],
        [   9175,  -1732 ],
        [  11286,  -2820 ],
        [  12505,  -3439 ],
        [  13504,  -3957 ],
        [  14679,  -4588 ],
        [  15127,  -4830 ],
        [  15667,  -5110 ],
        [  16156,  -5353 ],
        [  16350,  -5450 ],
    ]
 ).normalized(17000)
 inv_39_2_0_8e10 = Piecewise(
    [
        [ -16465,   6854 ],
        [ -16318,   6905 ],
        [ -16079,   6824 ],
        [ -15789,   6623 ],
        [ -15296,   6435 ],
        [ -14593,   6212 ],
        [ -14052,   5989 ],
        [ -13259,   5563 ],
        [  -8825,   3572 ],
        [  -1149,   -218 ],
        [   6004,  -3851 ],
        [  10704,  -6032 ],
        [  13131,  -6986 ],
        [  14268,  -7421 ],
        [  14894,  -7683 ],
        [  15301,  -7829 ],
        [  15839,  -8028 ],
        [  16356,  -8243 ],
        [  16507,  -8292 ],
    ]
 ).normalized(17000)
 inv_39_2_0_1e11 = Piecewise(
    [
        [ -16651,   5123 ],
        [ -16567,   5111 ],
        [ -16429,   5092 ],
        [ -16312,   5120 ],
        [ -16102,   5078 ],
        [ -15572,   4837 ],
        [ -15109,   4545 ],
        [ -14393,   4298 ],
        [ -13129,   3647 ],
        [  -5324,   -167 ],
        [   3762,  -4392 ],
        [  10248,  -7171 ],
        [  13522,  -8341 ],
        [  14221,  -8595 ],
        [  14851,  -8807 ],
        [  15280,  -8964 ],
        [  15864,  -9130 ],
        [  16440,  -9363 ],
        [  16585,  -9409 ],
    ]
 ).normalized(17000)
 inv_39_2_0_15e10 = Piecewise(
    [
        [ -16854,   1899 ],
        [ -16811,   1926 ],
        [ -16759,   1908 ],
        [ -16723,   1910 ],
        [ -16670,   1908 ],
        [ -16569,   1907 ],
        [ -16466,   1877 ],
        [ -16269,   1775 ],
        [ -15731,   1520 ],
        [ -13797,    601 ],
        [  -1756,  -4314 ],
        [   9395,  -7804 ],
        [  13461,  -8670 ],
        [  14026,  -8763 ],
        [  14766,  -8878 ],
        [  15279,  -9140 ],
        [  16084,  -9300 ],
        [  16568,  -9413 ],
        [  16672,  -9386 ],
    ]
 ).normalized(17000)
--- a/crates/applications/stacked_cores/scripts/stacked_cores_40xx_db.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_40xx_db.py
--- a/crates/applications/stacked_cores/scripts/stacked_cores_52xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_52xx.py
@@ -0,0 +1,89 @@
 #!/usr/bin/env python3
 import os
 from natsort import natsorted
 from extract_meas import extract_parameterized_meas, indented
 from stacked_cores_52xx_db import DB
 ## CONSTANTS/CONFIGURATION
 # list of sims to extract details for
 PREFIXES = { "52", "53", "54", "55", "56", "57", "58", "59", "60", "61" }
 def times_of_interest(sim_name: str) -> list:
    # could be more intelligent, extracting e.g. the clock duration from the name
    if sim_name.startswith("52-"):
        return [2e-9, 4e-9, 8e-9]
    if sim_name.startswith("53-"):
        return [2e-9, 4e-9]
    if sim_name.startswith("54-"):
        return [2e-9, 4e-9, 8e-9]
    if sim_name.startswith("55-"):
        return [4e-9, 6e-9, 10e-9]
    if sim_name.startswith("56-"):
        return [4e-9, 6e-9]
    if sim_name.startswith("57-"):
        return [4e-9, 6e-9]
    if sim_name.startswith("58-"):
        return [4e-9, 6e-9]
    if sim_name.startswith("59-buf-inner_input-"):
        return [2e-9, 4e-9]
    if sim_name.startswith("59-buf-edge_input-"):
        return [4e-9, 6e-9]
    if sim_name.startswith("60-"):
        return [4e-9, 6e-9]
    if sim_name.startswith("61-"):
        return [4e-9, 6e-9]
 ## USER-FACING FUNCTIONS
 def read_db(name_filter=lambda name: True, min_meas: int=0) -> dict:
    return {
        name: meas for (name, meas) in DB.items()
        if name_filter(name) \
        and meas.num_runs() >= min_meas
    }
 def update_db():
    db = compute_db()
    dump("stacked_cores_52xx_db.py", db)
 ## IMPLEMENTATION DETAILS
 def compute_db():
    here, _ = os.path.split(__file__)
    toplevel_out = f"{here}/../../../../out/applications/stacked_cores"
    stems = extract_stems(os.listdir(toplevel_out))
    return {
        s: extract_parameterized_meas(os.path.join(toplevel_out, s), times_of_interest(s))
        for s in stems
    }
 def extract_stems(dirlist: list) -> list:
    stems = set()
    TERM = "-drive-"
    for d in dirlist:
        print(d)
        header = d.split('-')[0]
        if header not in PREFIXES: continue
        if TERM not in d: continue
        stem = d[:d.find(TERM) + len(TERM)]
        stems.add(stem)
    return stems
 def dump(path: str, db: dict):
    with open(path, "w") as f:
        f.write("from extract_meas import MeasRow, ParameterizedMeas\n\n")
        f.write("DB = {")
        for k, v in natsorted(db.items()):
            f.write(indented(f"\n{k!r}: {v},"))
        f.write("\n}")
 if __name__ == '__main__': update_db()
--- a/crates/applications/stacked_cores/scripts/stacked_cores_52xx_db.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_52xx_db.py
--- a/crates/applications/stacked_cores/scripts/stacked_cores_52xx_plotters.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_52xx_plotters.py
@@ -0,0 +1,176 @@
 from math import sqrt
 import plotly.express as px
 from pandas import DataFrame
 import scipy.optimize as opt
 unit_to_m = lambda u: -18000 + 36000 * u
 sweep_1d = lambda points=101: [unit_to_m(x/(points-1)) for x in range(points)]
 def plot(name: str, x_name: str, y_series: list):
    """ plot y(x), where y values are specified by `y_series` and x is inferred """
    df = DataFrame(data={ x_name: sweep_1d(len(y_series)), "y": y_series })
    fig = px.line(df, x=x_name, y="y", title=name)
    fig.show()
 def plot_slope(name: str, x_name: str, y_series: list):
    slope = extract_slope(y_series)
    plot(name, x_name, slope)
 def extract_slope(y_series: list):
    dist = 2 * 36000 / (len(y_series) - 1)
    known = [ (next - prev)/dist for (prev, next) in zip(y_series[:-2], y_series[2:]) ]
    return [known[0]] + known + [known[-1]]
 def eval_series(meas: 'ParameterizedMeas', points: list, extract_tx, y_idx: int = -1) -> list:
    """
    extract a list of y-value floats from `meas`.
    each x value is a tuple of desired M values at which to sample the curve.
    e.g. points = [ (None, 1000.0, 2000.0, None) ] samples at M1=1000.0, M2=2000.0,
    treating M0 and M3 as dependent values.
    `y_idx` specifies which M value should be treated as the dependent value to be computed.
    e.g. `y_idx=0` to compute M0.
    `extract_tx` is a function mapping one run (list[list[float]] of M values)
      to a measured point of the transfer function. e.g. [15000, -15000, 14000] for a 3-core OR gate.
    """
    return [sample_all(meas, p, extract_tx)[y_idx] for p in points]
 def sample_all(meas: 'ParameterizedMeas', at: tuple, extract_tx) -> tuple:
    """
    computes the interpolated M values at the provided `at` coordinate;
    effectively fillin in whichever items in `at` are left at `None`
    """
    runs = [extract_tx(r) for r in meas.runs()]
    distances = [(distance_to_sq(m, at), m) for m in runs]
    # interpolated = weighted_sum_of_neighbors_by_inv_distance(distances)
    interpolated = interpolate_minl1(at, runs, distances)
    print(at, interpolated)
    return interpolated
 def extract_52xx_tx(meas_rows: list) -> tuple:
    """
    extracts a flat list of input/output M mappings from a 52xx run
    """
    return (meas_rows[0].m[0], meas_rows[0].m[1], meas_rows[1].m[2], meas_rows[2].m[3])
 def interpolate_minl1(at: tuple, runs: list, distances: list) -> tuple:
    # let R = `runs`, A = `at`, D = `distances`, x be the weight of each run
    # such that the result is R0 x0 + R1 x1 + ...
    #
    # solve for x
    #   subject to R0 x0 + R1 x1 + ... = A for the elements of A != None
    #   minimize D0 x0 + D1 x1 + ...
    #
    # relevant scipy docs:
    # - <https://docs.scipy.org/doc/scipy/tutorial/optimize.html#trust-region-constrained-algorithm-method-trust-constr>
    # - <https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.minimize.html>
    fixed_coords = [(i, a) for (i, a) in enumerate(at) if a is not None]
    num_fixed_coords = len(fixed_coords)
    num_runs = len(runs)
    # create a matrix E, where E*x = A for Ai != None
    eq_constraints = [[0]*num_runs for _ in range(num_fixed_coords)]
    for run_idx, run in enumerate(runs):
        for constraint_idx, (coord_idx, _a) in enumerate(fixed_coords):
            eq_constraints[constraint_idx][run_idx] = run[coord_idx]
    eq_rhs = [a for (i, a) in fixed_coords]
    eq_constraint = opt.LinearConstraint(eq_constraints, eq_rhs, eq_rhs)
    # constrain the sum of weights to be 1.0
    weights_sum_to_1_constraint = opt.LinearConstraint([[1] * num_runs], [1], [1])
    # constrain the weights to be positive
    bounds = opt.Bounds([0]*num_runs, [float("Inf")]*num_runs)
    def score(weights: list) -> float:
        # function to minimize: D0 x0 + D1 x1 + ...
        return sum(w*d[0] for w, d in zip(weights, distances))
    # compute the weight of each run
    init = [0]*num_runs
    constraints = [eq_constraint, weights_sum_to_1_constraint]
    try:
        res = opt.minimize(score, init, method='trust-constr', constraints=constraints, bounds=bounds)
    except ValueError as e:
        print(f"failed to interpolate point {e}")
        return [0] * len(at)
    run_weights = res.x
    # sum the weighted runs
    return element_sum([weighted(run, weight) for run, weight in zip(runs, run_weights)])
 def interpolate(meas: 'ParameterizedMeas', a0: float, a1: float) -> tuple:
    """
    this interpolates a point among four neighboring points in 2d.
    the implementation only supports 2d, but the technique is extendable to N dim.
    """
    rows = [r.m for r in meas.all_rows()]
    distances = [(distance_to(m, (a0, a1)), m) for m in rows]
    # a0_below_dist, a0_below_val = min(d for d in distances if d[1][0] <= a0)
    # a0_above_dist, a0_above_val = min(d for d in distances if d[1][0] >= a0)
    # a1_below_dist, a1_below_val = min(d for d in distances if d[1][1] <= a1)
    # a1_above_dist, a1_above_val = min(d for d in distances if d[1][1] >= a1)
    a0_below = min((d for d in distances if d[1][0] <= a0), default=None)
    a0_above = min((d for d in distances if d[1][0] >= a0), default=None)
    a1_below = min((d for d in distances if d[1][1] <= a1), default=None)
    a1_above = min((d for d in distances if d[1][1] >= a1), default=None)
    neighbors = [a for a in [a0_below, a0_above, a1_below, a1_above] if a is not None]
    return weighted_sum_of_neighbors_by_inv_distance(neighbors)
 def weighted_sum_of_neighbors_by_inv_distance(neighbors: list) -> tuple:
    """
    each neighbor is (distance, value).
    return a weighted sum of these neighbors, where lower-distance neighbors are more strongly weighted.
    """
    D = sum(a[0] for a in neighbors)
    weight_n = lambda n: 1/max(n[0], 1e-3)  # non-normalized weight for neighbor
    W = sum(weight_n(n) for n in neighbors)
    weighted_n = lambda n: weighted(n[1], weight_n(n)/W)  # normalized weighted contribution for neighbor
    return element_sum([weighted_n(n) for n in neighbors])
 def weighted_sum_of_neighbors(neighbors: list) -> tuple:
    """
    each neighbor is (distance, value).
    return a weighted sum of these neighbors, where lower-distance neighbors are more strongly weighted.
    """
    D = sum(a[0] for a in neighbors)
    weight_n = lambda n: D - n[0]  # non-normalized weight for neighbor
    W = sum(weight_n(n) for n in neighbors)
    weighted_n = lambda n: weighted(n[1], weight_n(n)/W)  # normalized weighted contribution for neighbor
    return element_sum([weighted_n(n) for n in neighbors])
 def distance_to(p0: tuple, p1: tuple) -> float:
    """
    return the L2-norm distance from p0 to p1.
    any coordinates set to `None` are ignored.
    e.g. `distance_to((1, 2, 3), (None, 4, 5))` is the same as `distance_to((2, 3), (4, 5))`
    """
    return sqrt(distance_to_sq(p0, p1))
 def distance_to_sq(p0: tuple, p1: tuple) -> float:
    return sum((x0-x1)*(x0-x1) for (x0, x1) in zip(p0, p1) if x0 is not None and x1 is not None)
 def element_sum(lists: list) -> list:
    """
    given a list[list[float]] where each inner length is of identical length,
    returns a list[float] by summing along each axis.
    e.g. element_sum([[1, 2], [3, 4], [5, 6]]) gives `[1+2+5, 2+4+6]`
    """
    elems = lists[0]
    for l in lists[1:]:
        for i, e in enumerate(l):
            elems[i] += e
    return elems
 def weighted(l: list, scale: float) -> list:
    """
    given list[float], returns a new list[float] where each element is multipled by `scale`
    """
    return [e*scale for e in l]
--- a/crates/applications/stacked_cores/scripts/stacked_cores_8xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_8xx.py
@@ -0,0 +1,57 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 8xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_8xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_start = last_row_before_t(rows, 2e-9)
    tx_end = last_row_before_t(rows, 3e-9)
    noop_start = last_row_before_t(rows, 5e-9)
    noop_end = last_row_before_t(rows, 6e-9)
    m_tx_start = extract_m(tx_start)
    m_tx_end = extract_m(tx_end)
    m_noop_start = extract_m(noop_start)
    m_noop_end = extract_m(noop_end)
    num_m = len(m_tx_start)
    m0_switch = abs(m_tx_end[0] - m_tx_start[0])
    m0_noop_switch = abs(m_noop_end[0] - m_noop_start[0])
    m_tx_switch_arr = [round(abs(m_tx_end[i] - m_tx_start[i])) for i in range(1, num_m)]
    m_tx_switch = sum(m_tx_switch_arr)
    m_noop_switch_arr = [round(abs(m_noop_end[i] - m_noop_start[i])) for i in range(1, num_m)]
    m_noop_switch = sum(m_noop_switch_arr)
    ratio_tx_noop = m_tx_switch / m_noop_switch
    ratio_tx_switch = m_tx_switch / m0_switch
    ratio_noop_switch = m_noop_switch / m0_switch
    ratio_tx_noop_switch = (m_tx_switch - m_noop_switch) / m0_switch
    print(f'm0 tx: {m0_switch}  ({m_tx_start[0]} -> {m_tx_end[0]})')
    print(f'm0 noop: {m0_noop_switch}  ({m_noop_start[0]} -> {m_noop_end[0]})')
    print('')
    print(f'm(tx):   {m_tx_start}')
    print(f'      -> {m_tx_end}')
    print('')
    print(f'm(noop): {m_noop_start}')
    print(f'      -> {m_noop_end}')
    print('')
    print(f'switched(tx): {m_tx_switch_arr}')
    print(f'switched(noop): {m_noop_switch_arr}')
    print('')
    print(f'tx/noop: {ratio_tx_noop:.3}')
    print(f'tx/m0: {ratio_tx_switch:.3}')
    print(f'noop/m0: {ratio_noop_switch:.3}')
    print(f'(tx-noop)/m0: {ratio_tx_noop_switch:.3}')
 if __name__ == '__main__':
    extract_8xx(sys.argv[1])
--- a/crates/applications/stacked_cores/scripts/stacked_cores_9xx.py
+++ b/crates/applications/stacked_cores/scripts/stacked_cores_9xx.py
@@ -0,0 +1,48 @@
 #!/usr/bin/env python3
 """
 invoke with the path to a meas.csv file for the stacked_core 9xx demos
 to extract higher-level info from them.
 """
 import sys
 from stacked_cores import load_csv, labeled_rows, last_row_before_t, extract_m
 def extract_9xx(path: str):
    header, raw_rows = load_csv(path)
    rows = labeled_rows(header, raw_rows)
    tx_start = last_row_before_t(rows, 2e-9)
    tx_end = last_row_before_t(rows, 4e-9)
    rev_start = last_row_before_t(rows, 4e-9)
    rev_end = last_row_before_t(rows, 6e-9)
    m_tx_start = extract_m(tx_start)
    m_tx_end = extract_m(tx_end)
    m_rev_start = extract_m(rev_start)
    m_rev_end = extract_m(rev_end)
    m1_switch = abs(m_tx_end[1] - m_tx_start[1])
    m1_rev_switch = abs(m_rev_end[1] - m_rev_start[1])
    m_tx_switch_arr = [round(abs(m_tx_end[i] - m_tx_start[i])) for i in [0, 2]]
    m_tx_switch = sum(m_tx_switch_arr)
    m_rev_switch_arr = [round(abs(m_rev_end[i] - m_rev_start[i])) for i in [0, 2]]
    m_rev_switch = sum(m_rev_switch_arr)
    ratio_tx_switch = m_tx_switch / m1_switch
    ratio_roundtrip = m1_rev_switch / m1_switch
    print(f'm1 tx: {m1_switch}  ({m_tx_start[1]} -> {m_tx_end[1]})')
    print(f'm1 rev: {m1_rev_switch}  ({m_rev_start[1]} -> {m_rev_end[1]})')
    print('')
    print(f'm(tx):   {m_tx_start}')
    print(f'      -> {m_tx_end}')
    print('')
    print(f'm(rev): {m_rev_start}')
    print(f'      -> {m_rev_end}')
    print('')
    print(f'tx/m1: {ratio_tx_switch:.3}')
    print(f'rev/m1: {ratio_roundtrip:.3}')
 if __name__ == '__main__':
    extract_9xx(sys.argv[1])
--- a/crates/applications/stacked_cores/src/main.rs
+++ b/crates/applications/stacked_cores/src/main.rs
--- a/crates/applications/wavefront/src/main.rs
+++ b/crates/applications/wavefront/src/main.rs
@@ -7,10 +7,14 @@
 //! with something that absorbs energy. since this example doesn't, it lets you see what
 //! happens when you just use the default boundary conditions.
-use coremem::{mat, driver};
+use coremem::{mat, Driver};
-use coremem::geom::{Coord as _, Cube, Index, Vec3};
+use coremem::geom::{Coord as _, Cube, Index};
 use coremem::units::Seconds;
-use coremem::stim::{Stimulus, TimeVarying as _, UniformStimulus};
+use coremem::sim::spirv::{self, SpirvSim};
 use coremem::stim::{Fields, ModulatedVectorField, Pulse, RegionGated};
 use coremem::cross::vec::Vec3;
 type Mat = mat::GenericMaterial<f32>;
 fn main() {
    coremem::init_logging();
@@ -21,14 +25,13 @@ fn main() {
    // each cell represents 1um x 1um x 1um volume
    let feature_size = 1e-6;
-    // Create the simulation "driver" which uses the CPU as backend.
+    // create the simulation "driver".
-    // by default all the computations are done with R32: a f32 which panics on NaN/Inf
+    // the first parameter is the float type to use: f32 for unchecked math, coremem::real::R32
-    // you can parameterize it to use R64, or unchecked f32 -- see src/driver.rs for the definition
+    // to guard against NaN/Inf (useful for debugging).
-    let mut driver: driver::CpuDriver = driver::Driver::new(size, feature_size);
+    // to run this on the gpu instead of the gpu, replace `CpuBackend` with `WgpuBackend`.
-
+    let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::CpuBackend>::new(
-    // uncomment to use the Spirv/GPU driver. this one is restricted to unchecked f32.
+        size, feature_size
-    // note: this won't have better perf unless you reduce the y4m/term renderer framerate below.
+    ));
    // let mut driver: driver::SpirvDriver = driver::Driver::new_spirv(size, feature_size);
    // create a conductor on the left side.
    let conductor = Cube::new(
@@ -43,12 +46,12 @@ fn main() {
        Index::new(201, height*3/4, 1).to_meters(feature_size),
    );
    // emit a constant E/H delta over this region for 100 femtoseconds
-    let stim = Stimulus::new(
+    let stim = ModulatedVectorField::new(
-        center_region,
+        RegionGated::new(center_region, Fields::new_eh(
-        UniformStimulus::new(
+            Vec3::new(2e19, 0.0, 0.0),
-            Vec3::new(2e19, 0.0, 0.0),  // E field (per second)
+            Vec3::new(0.0, 0.0, 2e19/376.730),
-            Vec3::new(0.0, 0.0, 2e19/376.730)  // H field (per second)
+        )),
-        ).gated(0.0, 100e-15),
+        Pulse::new(0.0, 100e-15),
    );
    driver.add_stimulus(stim);
--- a/crates/coremem/Cargo.toml
+++ b/crates/coremem/Cargo.toml
@@ -12,19 +12,16 @@ crate-type = ["lib"]
 [dependencies]
 bincode = "1.3"  # MIT
 common_macros = "0.1"  # MIT or Apache 2.0
 crossbeam = "0.8"  # MIT or Apache 2.0
 crossterm = "0.24"  # MIT
 csv = "1.1"  # MIT or Unlicense
 dashmap = "5.3"  # MIT
 dyn-clone = "1.0"  # MIT or Apache 2.0
 enum_dispatch = "0.3"  # MIT or Apache 2.0
 env_logger = "0.9"  # MIT or Apache 2.0
 float_eq = "1.0"  # MIT or Apache 2.0
 font8x8 = "0.3"  # MIT
 futures = "0.3"  # MIT or Apache 2.0
 image = "0.24"  # MIT
 imageproc = "0.23"  # MIT
 indexmap = "1.9"  # MIT or Apache 2.0
 lazy_static = "1.4"  # MIT or Apache 2.0
 log = "0.4"  # MIT or Apache 2.0
 more-asserts = "0.3"  # CC0-1.0
 ndarray = { version = "0.15", features = ["rayon", "serde"] }  # MIT or Apache 2.0
@@ -33,8 +30,6 @@ num = "0.4"  # MIT or Apache 2.0
 rand = "0.8"  # MIT or Apache 2.0
 rayon = "1.5"  # MIT or Apache 2.0
 serde = "1.0"  # MIT or Apache 2.0
 threadpool = "1.8"  # MIT or Apache 2.0
 typetag = "0.2"  # MIT or Apache 2.0
 y4m = "0.7"  # MIT
 wgpu = "0.12"
@@ -42,16 +37,17 @@ wgpu = "0.12"
 # TODO: update to 0.13
 # wgpu = { version = "0.13", features = ["spirv", "vulkan-portability"] }  # MIT or Apache 2.0
 # spirv-* is MIT or Apache 2.0
-spirv-builder = { git = "https://github.com/EmbarkStudios/rust-gpu", features = ["use-compiled-tools"] }
+spirv-builder = { git = "https://github.com/Rust-GPU/rust-gpu", rev = "d78c301799e9d254aab3156a230c9a59efd94122", features = ["use-compiled-tools"] }
-spirv-std = { git = "https://github.com/EmbarkStudios/rust-gpu" }
+spirv-std = { git = "https://github.com/Rust-GPU/rust-gpu", rev = "d78c301799e9d254aab3156a230c9a59efd94122" }
-spirv-std-macros = { git = "https://github.com/EmbarkStudios/rust-gpu" }
+spirv-std-macros = { git = "https://github.com/Rust-GPU/rust-gpu", rev = "d78c301799e9d254aab3156a230c9a59efd94122" }
 spirv_backend = { path = "../spirv_backend" }
 spirv_backend_runner = { path = "../spirv_backend_runner" }
-coremem_types = { path = "../types", features = ["fmt", "serde"] }
+coremem_cross = { path = "../cross", features = ["iter", "fmt", "serde", "std"] }
 [dev-dependencies]
 criterion = "0.3"
 float_eq = "1.0"  # MIT or Apache 2.0
 [[bench]]
 name = "driver"
--- a/crates/coremem/benches/driver.rs
+++ b/crates/coremem/benches/driver.rs
@@ -1,27 +1,16 @@
-use coremem::{Driver, SimState, SpirvDriver};
+use coremem::Driver;
 use coremem::geom::Index;
-use coremem::mat::{Ferroxcube3R1MH, IsoConductorOr, GenericMaterial, GenericMaterialNoPml, GenericMaterialOneField};
+use coremem::mat::{Ferroxcube3R1MH, IsoConductorOr, GenericMaterial};
-use coremem::real::R32;
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
 use coremem::sim::spirv::{self, SpirvSim};
 use criterion::{BenchmarkId, criterion_group, criterion_main, Criterion};
 type DefaultDriver = Driver::<SimState<R32, GenericMaterial<R32>>>;
 pub fn bench_step(c: &mut Criterion) {
    for size in &[10, 20, 40, 80, 160] {
        let sim = SimState::<R32, GenericMaterial<R32>>::new(Index::new(*size, *size, *size), 1e-5);
        c.bench_with_input(BenchmarkId::new("Driver::step", size), &sim, |b, sim| {
            let mut driver = Driver::new_with_state(sim.clone());
            b.iter(|| driver.step())
        });
    }
 }
 pub fn bench_step_spirv(c: &mut Criterion) {
    type Mat = GenericMaterial<f32>;
    for size in &[10, 20, 40, 80, 160] {
-        let sim: SpirvSim = SpirvSim::new(Index::new(*size, *size, *size), 1e-5);
+        let sim = SpirvSim::<f32, Mat, WgpuBackend>::new(Index::new(*size, *size, *size), 1e-5);
        c.bench_with_input(BenchmarkId::new("Driver::step_spirv", size), &sim, |b, sim| {
-            let mut driver = Driver::new_with_state(sim.clone());
+            let mut driver = Driver::new(sim.clone());
            b.iter(|| driver.step())
        });
    }
@@ -30,42 +19,13 @@ pub fn bench_step_spirv(c: &mut Criterion) {
 pub fn bench_step_spirv_iso_3r1(c: &mut Criterion) {
    type Mat = IsoConductorOr<f32, Ferroxcube3R1MH>;
    for size in &[10, 20, 40, 80, 160] {
-        let sim: SpirvSim<Mat> = SpirvSim::new(Index::new(*size, *size, *size), 1e-5);
+        let sim = SpirvSim::<f32, Mat, WgpuBackend>::new(Index::new(*size, *size, *size), 1e-5);
        c.bench_with_input(BenchmarkId::new("Driver::spirv_ISO3R1", size), &sim, |b, sim| {
-            let mut driver: SpirvDriver<Mat> = Driver::new_with_state(sim.clone());
+            let mut driver = Driver::new(sim.clone());
            b.iter(|| driver.step())
        });
    }
 }
-// pub fn bench_step_no_pml(c: &mut Criterion) {
+criterion_group!(benches, bench_step_spirv, bench_step_spirv_iso_3r1);
 //     for size in &[10, 20, 40, 80, 160] {
 //         c.bench_with_input(BenchmarkId::new("Driver::step_no_pml", size), size, |b, &size| {
 //             let mut driver = DefaultDriver::new(Index::new(size, size, size), 1e-5);
 //             b.iter(|| driver.step())
 //         });
 //     }
 // }
 // 
 // pub fn bench_step_one_vec(c: &mut Criterion) {
 //     for size in &[10, 20, 40, 80, 160] {
 //         c.bench_with_input(BenchmarkId::new("Driver::step_one_vec", size), size, |b, &size| {
 //             let mut driver = DefaultDriver::new(Index::new(size, size, size), 1e-5);
 //             b.iter(|| driver.step())
 //         });
 //     }
 // }
 // 
 // pub fn bench_step_with_pml(c: &mut Criterion) {
 //     let size = 40;
 //     for thickness in &[0, 1, 2, 4, 8, 16] {
 //         c.bench_with_input(BenchmarkId::new("Driver::step_with_pml", thickness), thickness, |b, &thickness| {
 //             let mut driver = DefaultDriver::new(Index::new(size, size, size), 1e-5);
 //             driver.add_pml_boundary(Index::new(thickness, thickness, thickness));
 //             b.iter(|| driver.step())
 //         });
 //     }
 // }
 criterion_group!(benches, /*bench_step,*/ bench_step_spirv, bench_step_spirv_iso_3r1);
 criterion_main!(benches);
--- a/crates/coremem/benches/history.txt
+++ b/crates/coremem/benches/history.txt
@@ -103,3 +103,15 @@ Driver::spirv_ISO3R1/20 time:   [548.70 us 555.85 us 563.28 us]
 Driver::spirv_ISO3R1/40 time:   [1.5333 ms 1.5405 ms 1.5489 ms]
 Driver::spirv_ISO3R1/80 time:   [13.299 ms 13.335 ms 13.376 ms]
 Driver::spirv_ISO3R1/160time:   [164.57 ms 164.74 ms 164.93 ms]
 5a0766451d96835061a674ab94f00341adb2b187:
 Driver::step_spirv/10   time:   [590.26 us 600.42 us 613.28 us]
 Driver::step_spirv/20   time:   [870.49 us 884.81 us 902.21 us]
 Driver::step_spirv/40   time:   [3.4094 ms 3.4285 ms 3.4498 ms]
 Driver::step_spirv/80   time:   [35.488 ms 35.673 ms 35.922 ms]
 Driver::step_spirv/160  time:   [270.98 ms 271.19 ms 271.43 ms]
 Driver::spirv_ISO3R1/10 time:   [585.57 us 596.11 us 608.79 us]
 Driver::spirv_ISO3R1/20 time:   [826.63 us 841.79 us 860.86 us]
 Driver::spirv_ISO3R1/40 time:   [2.8808 ms 2.9004 ms 2.9237 ms]
 Driver::spirv_ISO3R1/80 time:   [28.955 ms 29.027 ms 29.115 ms]
 Driver::spirv_ISO3R1/160time:   [216.03 ms 216.22 ms 216.45 ms]
--- a/crates/coremem/src/bin/bench.rs
+++ b/crates/coremem/src/bin/bench.rs
@@ -1,5 +1,7 @@
-use coremem::{self, Driver, GenericSim, SimState};
+use coremem::{self, Driver, AbstractSim};
-use coremem::sim::spirv::SpirvSim;
+use coremem::sim::spirv::{SpirvSim, WgpuBackend};
 use coremem::sim::units::Frame;
 use coremem::cross::mat::GenericMaterial;
 use coremem::geom::Index;
 use std::time::{Instant, Duration};
@@ -16,19 +18,23 @@ fn measure<F: FnMut()>(name: &str, n_times: u32, mut f: F) -> f32 {
    avg
 }
-fn measure_steps<S: GenericSim + Clone + Default + Send + Sync + 'static>(name: &str, steps_per_call: u32, mut d: Driver<S>) {
+fn measure_steps<S: AbstractSim + Clone + Default + Send + 'static>(name: &str, steps_per_call: u32, mut d: Driver<S::Real, S>) {
-    measure(name, 100/steps_per_call, || d.step_multiple(steps_per_call));
+    measure(name, 100/steps_per_call, || d.step_until(Frame(steps_per_call.into())));
 }
 fn main() {
    coremem::init_logging();
-    measure_steps("spirv/80", 1, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
+    measure_steps("spirv/80", 1, Driver::new(
-    measure_steps("sim/80", 1, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
+        SpirvSim::<f32, GenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
-    measure_steps("spirv/80 step(2)", 2, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
+    ));
-    measure_steps("sim/80 step(2)", 2, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
+    measure_steps("spirv/80 step(2)", 2, Driver::new(
-    measure_steps("spirv/80 step(10)", 10, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
+        SpirvSim::<f32, GenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
-    measure_steps("sim/80 step(10)", 10, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
+    ));
-    measure_steps("spirv/80 step(100)", 100, Driver::<SpirvSim>::new_spirv(Index::new(80, 80, 80), 1e-3));
+    measure_steps("spirv/80 step(10)", 10, Driver::new(
-    measure_steps("sim/80 step(100)", 100, Driver::<SimState>::new(Index::new(80, 80, 80), 1e-3));
+        SpirvSim::<f32, GenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
    ));
    measure_steps("spirv/80 step(100)", 100, Driver::new(
        SpirvSim::<f32, GenericMaterial<f32>, WgpuBackend>::new(Index::new(80, 80, 80), 1e-3)
    ));
 }
--- a/crates/coremem/src/bin/pml_tuning.rs
+++ b/crates/coremem/src/bin/pml_tuning.rs
@@ -1,496 +0,0 @@
 use coremem::*;
 use coremem::geom::*;
 use coremem::real::{Real as _, ToFloat as _};
 use coremem::stim::AbstractStimulus;
 use rand::rngs::StdRng;
 use rand::{Rng as _, SeedableRng as _};
 fn energy<R: Region>(s: &dyn SampleableSim, reg: &R) -> f32 {
    let e = f64::half() * s.map_sum_over_enumerated(reg, |_pos: Index, cell| {
        cell.e().mag_sq().to_f64()
    });
    e.cast()
 }
 fn energy_now_and_then<R: Region>(state: &mut StaticSim, reg: &R, frames: u32) -> (f32, f32) {
    let energy_0 = energy(state, reg);
    for _ in 0..frames {
        state.step();
    }
    let energy_1 = energy(state, reg);
    (energy_0, energy_1)
 }
 struct PmlStim<F> {
    /// Maps index -> (stim vector, stim frequency)
    f: F,
    t_end: f32,
    feat_size: f32,
 }
 impl<F: Fn(Index) -> (Vec3<f32>, f32) + Sync> AbstractStimulus for PmlStim<F> {
    fn at(&self, t_sec: f32, pos: Meters) -> (Vec3<f32>, Vec3<f32>) {
        let angle = t_sec/self.t_end*f32::two_pi();
        let gate = 0.5*(1.0 - angle.cos());
        let (e, hz) = (self.f)(pos.to_index(self.feat_size));
        let sig_angle = angle*hz;
        let sig = sig_angle.sin();
        (e*gate*sig, Vec3::zero())
    }
 }
 /// Apply some stimulus, and then let it decay and measure the ratio of energy left in the system
 fn apply_stim_full_interior<F>(state: &mut StaticSim, frames: u32, f: F)
 where F: Fn(Index) -> (Vec3<f32>, f32) + Sync // returns (E vector, omega)
 {
    let stim = PmlStim {
        f,
        t_end: (frames as f32) * state.timestep(),
        feat_size: state.feature_size(),
    };
    for _t in 0..frames {
        state.apply_stimulus(&stim);
        state.step();
    }
 }
 fn apply_stim_over_region<R, F>(state: &mut StaticSim, frames: u32, reg: R, f: F)
 where
    R: Region,
    F: Fn(Index) -> (Vec3<f32>, f32) + Sync,
 {
    let feat = state.feature_size();
    apply_stim_full_interior(state, frames, |idx| {
        if reg.contains(idx.to_meters(feat)) {
            f(idx)
        } else {
            (Vec3::zero(), 0.0)
        }
    });
 }
 /// Stimulate each point in the region with a pseudorandom (but predictable) e wave
 fn apply_chaotic_stim_over_region<R: Region>(state: &mut StaticSim, frames: u32, interior: R) {
    apply_stim_over_region(state, frames, interior, |idx| {
        let seed = (idx.x() as u64) ^ ((idx.y() as u64) << 16) ^ ((idx.z() as u64) << 32);
        let mut rng = StdRng::seed_from_u64(seed);
        let dir = Vec3::new(
            rng.gen_range(-1.0..1.0),
            rng.gen_range(-1.0..1.0),
            rng.gen_range(-1.0..1.0),
        );
        // XXX only works if it's a whole number. I suppose this makes sense though, as
        // other numbers would create higher harmonics when gated.
        let hz = rng.gen_range(0..=2);
        (dir, hz as _)
    })
 }
 fn chaotic_pml_test(state: &mut StaticSim, boundary: u32, padding: u32, frames: u32) -> f32 {
    let feat = state.feature_size();
    {
        let upper_left_idx = Index::unit()*padding;
        let lower_right_idx = state.size() - Index::unit() - upper_left_idx;
        let interior = Cube::new(upper_left_idx.to_meters(feat), lower_right_idx.to_meters(feat));
        apply_chaotic_stim_over_region(state, frames, interior);
    }
    let upper_left_idx = Index::unit()*boundary;
    let lower_right_idx = state.size() - Index::unit() - upper_left_idx;
    let sim_region = Cube::new(upper_left_idx.to_meters(feat), lower_right_idx.to_meters(feat));
    let (energy_0, energy_1) = energy_now_and_then(state, &sim_region, frames);
    // println!("Energy: {}/{}", energy_1, energy_0);
    energy_1/energy_0
 }
 #[allow(unused)]
 fn state_for_pml(size: Index, boundary: Index, feat_size: f32, sc_coeff: f32, cond_coeff: f32, pow: f32) -> StaticSim {
    let mut state = StaticSim::new(size, feat_size);
    let timestep = state.timestep();
    state.fill_boundary_using(boundary, |boundary_ness| {
        let b = boundary_ness.elem_pow(pow);
        let coord_stretch = b * sc_coeff / timestep;
        let conductivity = Vec3::unit() * (b.mag() * cond_coeff / timestep);
        unimplemented!();
        Static::default()
        // Static {
        //     // TODO PML coord_stretch,
        //     conductivity,
        //     pml: Some((PmlState::new(), PmlParameters::new(coord_stretch))),
        //     ..Default::default()
        // }
    });
    state
 }
 fn main() {
    // Explanation here (slide 63): https://empossible.net/wp-content/uploads/2020/01/Lecture-The-Perfectly-Matched-Layer.pdf
    // Claims that eps0/delta_t * n^3 is a good way to activate the PML layer
    for pow in vec![3.0] {
    for sc_coeff in vec![
        // 0.1*consts::EPS0,
        // 0.5*consts::EPS0,
        // 1e0*consts::EPS0,
        // 1e1*consts::EPS0,
        // 1e2*consts::EPS0,
        // 1e3*consts::EPS0,
        // 1e6*consts::EPS0,
        // 1e9*consts::EPS0,
        // 1e12*consts::EPS0,
        // 1e15*consts::EPS0,
        // 1e18*consts::EPS0,
        // 1e21*consts::EPS0,
        // 0.01,
        // 0.03,
        // 0.05,
        // 0.07,
        // 0.08,
        // 0.09,
        // 0.1,
        // 0.15,
        // 0.2,
        // 0.25,
        // 0.3,
        // 0.4,
        // 0.5,
        // 0.6,
        // 0.7,
        // 0.8,
        // 0.9,
        //0.95,
        // 1.0,
        // 1.5,
        // 2.0,
        // 3.0,
        // 5.0,
        // 10.0,
        // 100.0,
        // 1000.0,
        //0.0,
        0.5,
    ] {
    //for cond_coeff in vec![0.0, 1.0, 1e3, 1e6, 1e9] {
    for cond_coeff in vec![0.0, 0.5*f32::eps0()] {
        for frames in vec![400, 1200] {
            for pml_width in vec![1, 2, 4, 8, 16] {
                for feat_size in vec![1e-6] {
                    let size = Index::unit()*121;
                    let sim_inset = 40;
                    let boundary = Index::unit()*pml_width;
                    let mut state = state_for_pml(size, boundary, feat_size, sc_coeff, cond_coeff, pow);
                    let ratio = chaotic_pml_test(&mut state, pml_width, sim_inset, frames);
                    println!("{},{} (pow={}, f={}, width={}, frames={}): {}", sc_coeff, cond_coeff, pow, feat_size, pml_width, frames, ratio);
                }
            }
        }
    }
    }
    }
    // Conclusions:
    // * if coordinate stretching is divided by time_step, then the absorption is independent of
    //   feature size.
    // For 240 frames, PML width of 20, size=121, sim_inset = 40, cubic onset:
    // * coefficients between [0.4, 1.0] are all within 30% of each other
    // * coefficients between [0.5, 1.0] are all within 20% of each other
    // * coefficients > 1 show instability (i.e. instable if coord_stretch > 1 / timestep);
    // * absorption generally increases as the coefficient approaches 1.0 from the left.
    //   This begins to reverse at least by 0.98
    // For 2400 frames, instability starts somewhere between 0.7 and 0.8
    //   0.5 (f=0.000001, width=20, frames=2400): 0.0006807010986857421
    //   0.6 (f=0.000001, width=20, frames=2400): 0.0005800974138738364
    //   0.7 (f=0.000001, width=20, frames=2400): 0.0005092274390086559
    //   0.8 (f=0.000001, width=20, frames=2400): 7326609898580109000000
    //   These numbers use hz = rng.gen_range(0..=5), with 20 frames of excitation and 20 steps
    //     between source and boundary
    //
    // Reducing hz to 0..=2 achieves much better perf (pow=3.0):
    //   0.5 (f=0.000001, width=20, frames=2400): 0.0000007135657380376141
    //   0.6 (f=0.000001, width=20, frames=2400): 0.0000006561934691721446
    //   0.7 (f=0.000001, width=20, frames=2400): 0.0000006099334150762627
    //   0.8 (f=0.000001, width=20, frames=2400): 2180868728529433400000
    // Quadratic (pow=2.0):
    //   0.4 (pow=2, f=0.000001, width=20, frames=2400): 0.0000006839243836328471
    //   0.5 (pow=2, f=0.000001, width=20, frames=2400): 0.0000006189051555210391
    //   0.6 (pow=2, f=0.000001, width=20, frames=2400): 0.9331062414894028
    //   0.7 (pow=2, f=0.000001, width=20, frames=2400): 222818414359827930000000000000000000000000000000000000000000000000000000000000000000000
    // Possibly better efficacy, but also less stable
    // Linear (pow=1.0) is 100% unstable for at least coeff >= 0.5
    // Subcubic (pow=2.5):
    //   0.4 (pow=2.5, f=0.000001, width=20, frames=2400): 0.0000007333781495449756
    //   0.5 (pow=2.5, f=0.000001, width=20, frames=2400): 0.0000006632381361190291
    //   0.6 (pow=2.5, f=0.000001, width=20, frames=2400): 0.0000006089982278171636
    //   0.7 (pow=2.5, f=0.000001, width=20, frames=2400): 7853952559.189004
    // Superlinear (pow=1.5):
    //   0.4 (pow=1.5, f=0.000001, width=20, frames=2400): 0.0000006502259131444893
    //   0.5 (pow=1.5, f=0.000001, width=20, frames=2400): 0.0000005956833662247809
    //   0.6 (pow=1.5, f=0.000001, width=20, frames=2400): 175615904583581400000000000000000000000000000000000000000000000000000000000000000000
    // pow=4.0:
    //   0.4 (pow=4, f=0.000001, width=20, frames=2400): 0.0000009105207758718423
    //   0.5 (pow=4, f=0.000001, width=20, frames=2400): 0.0000008169331995553951
    //   0.6 (pow=4, f=0.000001, width=20, frames=2400): 0.0000007525813705681166
    //   0.7 (pow=4, f=0.000001, width=20, frames=2400): 0.0000007019565296008103
    //   0.8 (pow=4, f=0.000001, width=20, frames=2400): 0.0000006600546205898854
    //   0.9 (pow=4, f=0.000001, width=20, frames=2400): 0.0000006246047577819345
    //   1 (pow=4, f=0.000001, width=20, frames=2400): 5437370254206590000000000000000000000000000000000
    // pow=3.5:
    //   0.4 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000008485707012112478
    //   0.5 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000007653501316913873
    //   0.6 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000007047188963430957
    //   0.7 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000006561795324449849
    //   0.8 (pow=3.5, f=0.000001, width=20, frames=2400): 0.0000006159621438553542
    //   0.9 (pow=3.5, f=0.000001, width=20, frames=2400): 18913062838424785000000000000000000
    // So generally lower powers = more absorbtion (makes sense: higher average coordinate
    // stretching), but is more sensitive to error. All powers > 1.5 are stable at coeff=0.5.
    // We don't know that much about reflection from just this data.
    // Running over a smaller timeframe should give some suggestion about reflection
    //   0.4 (pow=2, f=0.000001, width=20, frames=120): 0.2870021971493659
    //   0.5 (pow=2, f=0.000001, width=20, frames=120): 0.2647114916517679 **
    //   0.6 (pow=2, f=0.000001, width=20, frames=120): 0.24807341265362437
    //   0.7 (pow=2, f=0.000001, width=20, frames=120): 0.2350081813369852
    //   0.8 (pow=2, f=0.000001, width=20, frames=120): 0.22438565227661014
    //   0.9 (pow=2, f=0.000001, width=20, frames=120): 0.21552516261496907
    //   1.0 (pow=2, f=0.000001, width=20, frames=120): 0.20798702383197107
    //   1.5 (pow=2, f=0.000001, width=20, frames=120): 0.39444643124947426
    //   0.4 (pow=3, f=0.000001, width=20, frames=120): 0.3596522778473902
    //   0.5 (pow=3, f=0.000001, width=20, frames=120): 0.33718409856439474
    //   0.6 (pow=3, f=0.000001, width=20, frames=120): 0.32025414395335816
    //   0.7 (pow=3, f=0.000001, width=20, frames=120): 0.3067653188201421 **
    //   0.8 (pow=3, f=0.000001, width=20, frames=120): 0.2956250635204507
    //   0.9 (pow=3, f=0.000001, width=20, frames=120): 0.2861895432992242
    //   1.0 (pow=3, f=0.000001, width=20, frames=120): 0.27804573152917056
    //   1.5 (pow=3, f=0.000001, width=20, frames=120): 0.24950413937515897
    //   0.4 (pow=4, f=0.000001, width=20, frames=120): 0.41043160705678194
    //   0.5 (pow=4, f=0.000001, width=20, frames=120): 0.38844802581052806
    //   0.6 (pow=4, f=0.000001, width=20, frames=120): 0.3721037027266112
    //   0.7 (pow=4, f=0.000001, width=20, frames=120): 0.35912262947037576
    //   0.8 (pow=4, f=0.000001, width=20, frames=120): 0.34837556188585944
    //   0.9 (pow=4, f=0.000001, width=20, frames=120): 0.33922665697709947 **
    //   1 (pow=4, f=0.000001, width=20, frames=120): 0.33128127577349487
    //   1.5 (pow=4, f=0.000001, width=20, frames=120): 0.30260541581006584
    // Even smaller timeframe:
    //   0.5 (pow=2, f=0.000001, width=20, frames=5): 1.0109467232603757
    //   0.5 (pow=2, f=0.000001, width=20, frames=10): 1.0117049984675424
    //   0.5 (pow=2, f=0.000001, width=20, frames=20): 1.015054295037722
    //   0.5 (pow=2, f=0.000001, width=20, frames=30): 1.016127315217072
    //   0.5 (pow=2, f=0.000001, width=20, frames=40): 1.0125450428623481
    //   0.5 (pow=2, f=0.000001, width=20, frames=60): 0.9520114341681021
    //   0.5 (pow=2, f=0.000001, width=20, frames=80): 0.7617289087518861
    //   0.5 (pow=2, f=0.000001, width=20, frames=120): 0.2647114916517679
    //   0.5 (pow=2, f=0.000001, width=20, frames=160): 0.018698432021826004
    //   0.5 (pow=2, f=0.000001, width=20, frames=200): 0.001053437888276037
    //   0.5 (pow=3, f=0.000001, width=20, frames=5): 1.0109467232603757
    //   0.5 (pow=3, f=0.000001, width=20, frames=10): 1.0117049984674478
    //   0.5 (pow=3, f=0.000001, width=20, frames=20): 1.0150542142468075
    //   0.5 (pow=3, f=0.000001, width=20, frames=30): 1.0161755433699602
    //   0.5 (pow=3, f=0.000001, width=20, frames=40): 1.0140118995944478
    //   0.5 (pow=3, f=0.000001, width=20, frames=60): 0.9828724159361404
    //   0.5 (pow=3, f=0.000001, width=20, frames=80): 0.8304056107875255
    //   0.5 (pow=3, f=0.000001, width=20, frames=120): 0.33718409856439474
    //   0.5 (pow=3, f=0.000001, width=20, frames=160): 0.033906877503334515
    //   0.5 (pow=3, f=0.000001, width=20, frames=200): 0.0016075121270576818
    //   0.5 (pow=4, f=0.000001, width=20, frames=5): 1.0109467232603757
    //   0.5 (pow=4, f=0.000001, width=20, frames=10): 1.0117049984674424
    //   0.5 (pow=4, f=0.000001, width=20, frames=20): 1.015054203569572
    //   0.5 (pow=4, f=0.000001, width=20, frames=30): 1.0161827617273314
    //   0.5 (pow=4, f=0.000001, width=20, frames=40): 1.014355067000562
    //   0.5 (pow=4, f=0.000001, width=20, frames=60): 0.9971421502814346
    //   0.5 (pow=4, f=0.000001, width=20, frames=80): 0.8718457443603458
    //   0.5 (pow=4, f=0.000001, width=20, frames=120): 0.38844802581052806
    //   0.5 (pow=4, f=0.000001, width=20, frames=160): 0.04879003869310861
    //   0.5 (pow=4, f=0.000001, width=20, frames=200): 0.0025039595751290534
    //   That the numbers are all the same for t < 20 is not encouraging.
    //     Could be because of the courant number 0.577? No energy has reached the border yet?
    // Why don't we query just the energy in the sim region -- not the boundary?
    // After filtering to measure energy only in the sim region (note that some energy outside the
    // sim region COULD be reflected back into the sim region later):
    //   0.5 (pow=2, f=0.000001, width=20, frames=5): 1.0109467232603757
    //   0.5 (pow=2, f=0.000001, width=20, frames=10): 1.0117049984565956
    //   0.5 (pow=2, f=0.000001, width=20, frames=20): 1.0150051760172862
    //   0.5 (pow=2, f=0.000001, width=20, frames=30): 1.0103422463150569
    //   0.5 (pow=2, f=0.000001, width=20, frames=40): 0.9534926741875023
    //   0.5 (pow=2, f=0.000001, width=20, frames=60): 0.7052236436305864
    //   0.5 (pow=2, f=0.000001, width=20, frames=80): 0.42366103692252166
    //   0.5 (pow=2, f=0.000001, width=20, frames=120): 0.047117482623349645
    //   0.5 (pow=2, f=0.000001, width=20, frames=160): 0.0012788501490854916
    //   0.5 (pow=2, f=0.000001, width=20, frames=200): 0.00025052555946608536
    //   0.5 (pow=2, f=0.000001, width=20, frames=300): 0.000026163786700826282
    //   0.5 (pow=2, f=0.000001, width=20, frames=400): 0.000009482696335385068
    //   0.5 (pow=2, f=0.000001, width=20, frames=600): 0.0000034339203626681873
    //   0.5 (pow=2, f=0.000001, width=20, frames=800): 0.0000016501593549063537
    //   0.5 (pow=2, f=0.000001, width=20, frames=1200): 0.0000007097133927819365
    //   0.5 (pow=2, f=0.000001, width=20, frames=1600): 0.0000004990355277480898
    //   0.5 (pow=2, f=0.000001, width=20, frames=2400): 0.00000026541631012862064
    //   0.5 (pow=3, f=0.000001, width=20, frames=5): 1.0109467232603757
    //   0.5 (pow=3, f=0.000001, width=20, frames=10): 1.0117049984564943
    //   0.5 (pow=3, f=0.000001, width=20, frames=20): 1.015005103609574
    //   0.5 (pow=3, f=0.000001, width=20, frames=30): 1.010347173332254
    //   0.5 (pow=3, f=0.000001, width=20, frames=40): 0.9534945122673749
    //   0.5 (pow=3, f=0.000001, width=20, frames=60): 0.7052084303225945
    //   0.5 (pow=3, f=0.000001, width=20, frames=80): 0.42365590263359737
    //   0.5 (pow=3, f=0.000001, width=20, frames=120): 0.04711930602404218
    //   0.5 (pow=3, f=0.000001, width=20, frames=160): 0.0013058690825149086
    //   0.5 (pow=3, f=0.000001, width=20, frames=200): 0.00027124952640186995
    //   0.5 (pow=3, f=0.000001, width=20, frames=300): 0.00003363606115185493
    //   0.5 (pow=3, f=0.000001, width=20, frames=400): 0.000012979216745112146
    //   0.5 (pow=3, f=0.000001, width=20, frames=600): 0.000004790890253991059
    //   0.5 (pow=3, f=0.000001, width=20, frames=800): 0.0000018967555372300478
    //   0.5 (pow=3, f=0.000001, width=20, frames=1200): 0.000000785458387440694
    //   0.5 (pow=3, f=0.000001, width=20, frames=1600): 0.0000005657588895614973
    //   0.5 (pow=3, f=0.000001, width=20, frames=2400): 0.0000002931443936827707
    //   0.5 (pow=4, f=0.000001, width=20, frames=5): 1.0109467232603757
    //   0.5 (pow=4, f=0.000001, width=20, frames=10): 1.011704998456489
    //   0.5 (pow=4, f=0.000001, width=20, frames=20): 1.0150050988039427
    //   0.5 (pow=4, f=0.000001, width=20, frames=30): 1.0103477261980267
    //   0.5 (pow=4, f=0.000001, width=20, frames=40): 0.9534933477754826
    //   0.5 (pow=4, f=0.000001, width=20, frames=60): 0.7052093991486525
    //   0.5 (pow=4, f=0.000001, width=20, frames=80): 0.4236543609610083
    //   0.5 (pow=4, f=0.000001, width=20, frames=120): 0.047143352151935734
    //   0.5 (pow=4, f=0.000001, width=20, frames=160): 0.0014253338264054373
    //   0.5 (pow=4, f=0.000001, width=20, frames=200): 0.0003719236701949888
    //   0.5 (pow=4, f=0.000001, width=20, frames=300): 0.0000655709065994179
    //   0.5 (pow=4, f=0.000001, width=20, frames=400): 0.000019428436023699366
    //   0.5 (pow=4, f=0.000001, width=20, frames=600): 0.000006787101433521501
    //   0.5 (pow=4, f=0.000001, width=20, frames=800): 0.000002352253311002263
    //   0.5 (pow=4, f=0.000001, width=20, frames=1200): 0.0000008945009475225972
    //   0.5 (pow=4, f=0.000001, width=20, frames=1600): 0.0000006298015155592009
    //   0.5 (pow=4, f=0.000001, width=20, frames=2400): 0.0000003110877898243663
    //   pow=2 and pow=3 look nearly interchangeable in perf, with pow=4 slightly worse.
    //   Given higher stability for pow=3, it pushes me in that direction
    // What about ordinary conductivity?
    //   Uniaxial conductors do nothing
    // non-axial conductors (measured over full volume -- not just sim volume)
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=20): 1.0150542017357447
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=40): 1.0144968836020802
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=80): 1.001249693707569
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.9703627391410313
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=240): 0.9839347439326915
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.9710971333858839
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=20): 1.015054152058065
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=40): 1.0139965206685178
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=80): 0.8611234324272511
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.38724876605299674
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=240): 0.010452374686360136
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.0006325928742141608
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=20): 1.0150333781336882
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=40): 0.9834360538048873
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=80): 0.5072413415421974
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=120): 0.10345998021237998
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=240): 0.007519161562708659
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=400): 0.0006070229440826888
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=20): 1.0150056763216164
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=40): 1.0123907067781637
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=80): 1.0095377475794036
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=120): 1.0027226600640793
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=240): 1.002880617201309
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=400): 0.9817776859704778
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=20): 1.0150056260987463
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=40): 1.0127202910529511
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=80): 1.0141889645709163
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=120): 1.012712629869813
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=240): 1.0225762750160987
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=400): 1.0158169138074233
    //   0,1 (pow=2, f=0.000001, width=20, frames=20): 1.0150056260488272
    //   0,1 (pow=2, f=0.000001, width=20, frames=40): 1.0127206223077903
    //   0,1 (pow=2, f=0.000001, width=20, frames=80): 1.01419363999915
    //   0,1 (pow=2, f=0.000001, width=20, frames=120): 1.0127226953239867
    //   0,1 (pow=2, f=0.000001, width=20, frames=240): 1.0225963209694455
    //   0,1 (pow=2, f=0.000001, width=20, frames=400): 1.0158520285873147
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=20): 1.0150542017810857
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=40): 1.014497284230248
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=80): 1.0013349322815948
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.9707653838346726
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=240): 0.98542720202683
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.9736668760490382
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=20): 1.0150541972999592
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=40): 1.0143834319294165
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=80): 0.9087702844329181
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.4641021861485614
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=240): 0.025542565246607578
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.0024678463432975324
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=20): 1.0150507306076173
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=40): 0.9973368290451762
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=80): 0.5359561255382069
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=120): 0.10107229161449789
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=240): 0.0018726822600725027
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=400): 0.0001395247365920271
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=20): 1.015006719517991
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=40): 1.0067208901320701
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=80): 0.9295573543881088
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=120): 0.8372562924822498
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=240): 0.7109091146305759
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=400): 0.5405933200528408
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=20): 1.015005627048278
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=40): 1.0127139915480314
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=80): 1.0141000517606538
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=120): 1.0125212238134818
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=240): 1.0221951620616994
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=400): 1.0151495300965783
    //   0,1 (pow=3, f=0.000001, width=20, frames=20): 1.0150056260497768
    //   0,1 (pow=3, f=0.000001, width=20, frames=40): 1.012720616007617
    //   0,1 (pow=3, f=0.000001, width=20, frames=80): 1.0141935510766382
    //   0,1 (pow=3, f=0.000001, width=20, frames=120): 1.0127225038875007
    //   0,1 (pow=3, f=0.000001, width=20, frames=240): 1.0225959397081388
    //   0,1 (pow=3, f=0.000001, width=20, frames=400): 1.0158513607157937
    //   It appears here that the cubic roll-off is best, and very low conductivities are required.
    //
    // Isotropic conductor, energy measured only within the sim area:
    //   The optimized case (0.5*EPS0/timestep * x^3) is almost IDENTICAL to the
    //   optimized stretched-coordinate version.
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.07433102240945513
    //   0,0.000000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.20431918328907037
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=120): 0.04851013262210713
    //   0,0.000000000001 (pow=2, f=0.000001, width=20, frames=400): 0.00018366232272528987
    //   0,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=120): 0.047101055930619994
    //   0,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=400): 0.000007399479477711689
    //   0,0.000000000008854187812813 (pow=2, f=0.000001, width=20, frames=120): 0.047132676278627536
    //   0,0.000000000008854187812813 (pow=2, f=0.000001, width=20, frames=400): 0.000008105989741898135
    //   0,0.00000000001 (pow=2, f=0.000001, width=20, frames=120): 0.047145523858393434
    //   0,0.00000000001 (pow=2, f=0.000001, width=20, frames=400): 0.000008327266269591764
    //   0,0.0000000001 (pow=2, f=0.000001, width=20, frames=120): 0.05005311352870215
    //   0,0.0000000001 (pow=2, f=0.000001, width=20, frames=400): 0.00003665711687010239
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=120): 0.0808038989604745
    //   0,0.000000001 (pow=2, f=0.000001, width=20, frames=400): 0.0005636469832522345
    //   0,0.00000001 (pow=2, f=0.000001, width=20, frames=120): 0.42090062694288544
    //   0,0.00000001 (pow=2, f=0.000001, width=20, frames=400): 0.07558396270665715
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=120): 0.9456301791158588
    //   0,0.000001 (pow=2, f=0.000001, width=20, frames=400): 0.9456217562930543
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=120): 0.9549600788853474
    //   0,0.001 (pow=2, f=0.000001, width=20, frames=400): 0.9782784298162882
    //   0,1 (pow=2, f=0.000001, width=20, frames=120): 0.9549694776242036
    //   0,1 (pow=2, f=0.000001, width=20, frames=400): 0.9783121245194134
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.07435108171392021
    //   0,0.000000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.20479144240774633
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=120): 0.050005777221700895
    //   0,0.000000000001 (pow=3, f=0.000001, width=20, frames=400): 0.0006548443359188922
    //   0,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=120): 0.0471097820852083
    //   0,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=400): 0.00000726683693079002
    //   0,0.000000000008854187812813 (pow=3, f=0.000001, width=20, frames=120): 0.04711795151894901
    //   0,0.000000000008854187812813 (pow=3, f=0.000001, width=20, frames=400): 0.000007527865420649209
    //   0,0.00000000001 (pow=3, f=0.000001, width=20, frames=120): 0.0471214615521427
    //   0,0.00000000001 (pow=3, f=0.000001, width=20, frames=400): 0.000007561968840324101
    //   0,0.0000000001 (pow=3, f=0.000001, width=20, frames=120): 0.04775494018209187
    //   0,0.0000000001 (pow=3, f=0.000001, width=20, frames=400): 0.00001655968038405813
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=120): 0.05548225210925576
    //   0,0.000000001 (pow=3, f=0.000001, width=20, frames=400): 0.00011973516700797696
    //   0,0.00000001 (pow=3, f=0.000001, width=20, frames=120): 0.09859866928724702
    //   0,0.00000001 (pow=3, f=0.000001, width=20, frames=400): 0.0010788775694009376
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=120): 0.7905518369602526
    //   0,0.000001 (pow=3, f=0.000001, width=20, frames=400): 0.5217962235754597
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=120): 0.9547813505519078
    //   0,0.001 (pow=3, f=0.000001, width=20, frames=400): 0.9776380394998598
    //   0,1 (pow=3, f=0.000001, width=20, frames=120): 0.9549692988681137
    //   0,1 (pow=3, f=0.000001, width=20, frames=400): 0.978311483657099
    // With both PML and conductor boundary:
    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=120): 0.04709454626708049
    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=400): 0.000007626399231684735
    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=1200): 0.0000008116632120088743
    //   0.5,0.0000000000044270939064065 (pow=2, f=0.000001, width=20, frames=2400): 0.00000032497678753300923
    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=120): 0.04709779630205149
    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=400): 0.000007132448872463483
    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=1200): 0.000000710451109532798
    //   0.5,0.0000000000044270939064065 (pow=3, f=0.000001, width=20, frames=2400): 0.0000002515070714149805
    //   This is basically no change from JUST PML or JUST conductors
    //   Maybe I should be trying to vary the width: maybe PML works more effectively for narrower
    //   boundaries than do conductors?
 }
--- a/crates/coremem/src/diagnostics.rs
+++ b/crates/coremem/src/diagnostics.rs
@@ -0,0 +1,188 @@
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 /// this is just a big dumb bag of perf-related metrics,
 /// gathered with the intention of identifying areas for optimization
 pub struct Diagnostics {
    frames_completed: u64,
    /// all known time spent in driver code, as measured from the toplevel
    time_in_driver: Duration,
    /// time during which driver had passed control to stimulus
    time_sim_step: Duration,
    time_prepping_stim: Duration,
    time_on_stimuli: Duration,
    /// time during which driver was preparing to render (e.g. cloning state)
    time_prepping_render: Duration,
    time_rendering: Duration,
    /// time during which driver was waiting for an async stimulus job
    time_blocked_on_stim: Duration,
    /// time during which driver was waiting for an async render job
    time_blocked_on_render: Duration,
    /// time during which CPU was waiting for GPU data
    time_reading_device: Duration,
    /// time during which CPU was transferring data to GPU
    time_writing_device: Duration,
    /// time during which the GPU was actively computing steps
    time_stepping_device: Duration,
    start_time: Instant,
 }
 #[derive(Clone, Default)]
 pub struct SyncDiagnostics(Arc<Mutex<Diagnostics>>);
 impl Default for Diagnostics {
    fn default() -> Self {
        Self::new()
    }
 }
 impl Diagnostics {
    pub fn new() -> Self {
        Self {
            frames_completed: 0,
            time_in_driver: Default::default(),
            time_sim_step: Default::default(),
            time_prepping_stim: Default::default(),
            time_on_stimuli: Default::default(),
            time_prepping_render: Default::default(),
            time_rendering: Default::default(),
            time_blocked_on_stim: Default::default(),
            time_blocked_on_render: Default::default(),
            time_reading_device: Default::default(),
            time_writing_device: Default::default(),
            time_stepping_device: Default::default(),
            start_time: Instant::now(),
        }
    }
    pub fn format(&self) -> String {
        let overall_time = self.start_time.elapsed().as_secs_f64();
        let driver_time = self.time_in_driver.as_secs_f64();
        let fps = (self.frames_completed as f64) / overall_time;
        let fps_line = format!("fps: {:6.2}", fps);
        let step_time = self.time_sim_step.as_secs_f64();
        let render_prep_time = self.time_prepping_render.as_secs_f64();
        let stim_block_time = self.time_blocked_on_stim.as_secs_f64();
        let render_block_time = self.time_blocked_on_render.as_secs_f64();
        let stim_prep_time = self.time_prepping_stim.as_secs_f64();
        let other_driver_time = driver_time - (
            step_time + stim_block_time + stim_prep_time + render_block_time + render_prep_time
        );
        let other_time = overall_time - driver_time;
        let toplevel_line = format!("toplevel\tstep: {:.1}s, stim_blocked: {:.1}s, render_blocked: {:.1}s, stim_prep: {:.1}s, render_prep: {:.1}s, driver_other: {:.1}s, unknown: {:.1}s",
            step_time,
            stim_block_time,
            render_block_time,
            stim_prep_time,
            render_prep_time,
            other_driver_time,
            other_time,
        );
        let device_step_time = self.time_stepping_device.as_secs_f64();
        let device_write_time = self.time_writing_device.as_secs_f64();
        let device_read_time = self.time_reading_device.as_secs_f64();
        let device_line = format!("> gpu\tstep: {:.1}s, write: {:.1}s, read: {:.1}s",
            device_step_time,
            device_write_time,
            device_read_time,
        );
        let stim_bg_time = self.time_on_stimuli.as_secs_f64();
        let render_bg_time = self.time_rendering.as_secs_f64();
        let bg_line = format!("> async\tstim: {:.1}s, render: {:.1}s",
            stim_bg_time,
            render_bg_time,
        );
        format!("{}\n  {}\n  {}\n  {}", fps_line, toplevel_line, device_line, bg_line)
    }
 }
 impl SyncDiagnostics {
    pub fn new() -> Self {
        Self(Arc::new(Mutex::new(Diagnostics::new())))
    }
    pub fn format(&self) -> String {
        self.0.lock().unwrap().format()
    }
    /// measure the duration of some arbitrary chunk of code.
    /// used internally.
    pub fn measure<R, F: FnOnce() -> R>(f: F) -> (Duration, R) {
        let start = Instant::now();
        let r = f();
        (start.elapsed(), r)
    }
    pub fn instrument_driver<R, F: FnOnce() -> R>(&self, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_in_driver += elapsed;
        ret
    }
    /// record the duration of the sim step operation.
    pub fn instrument_step<R, F: FnOnce() -> R>(&self, frames: u64, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        let mut me = self.0.lock().unwrap();
        me.time_sim_step += elapsed;
        me.frames_completed += frames as u64;
        ret
    }
    /// record the duration spent preparing for render (i.e. cloning stuff and moving it into a
    /// render pool).
    pub fn instrument_render_prep<R, F: FnOnce() -> R>(&self, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_prepping_render += elapsed;
        ret
    }
    /// record the duration actually spent doing CPU render work
    pub fn instrument_render_cpu_side<R, F: FnOnce() -> R>(&self, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_rendering += elapsed;
        ret
    }
    pub fn instrument_stimuli_prep<R, F: FnOnce() -> R>(&self, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_prepping_stim += elapsed;
        ret
    }
    /// record the duration spent blocking the simulation because the stimulus queue is full.
    pub fn instrument_stimuli_blocked<R, F: FnOnce() -> R>(&self, f: F) -> R{
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_blocked_on_stim += elapsed;
        ret
    }
    /// record the duration spent blocking the simulation because the render queue is full.
    pub fn instrument_render_blocked<R, F: FnOnce() -> R>(&self, f: F) -> R{
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_blocked_on_render += elapsed;
        ret
    }
    pub fn instrument_stimuli<R, F: FnOnce() -> R>(&self, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_on_stimuli += elapsed;
        ret
    }
    pub fn record_step_device(&self, t: Duration) {
        self.0.lock().unwrap().time_stepping_device += t;
    }
    pub fn instrument_read_device<R, F: FnOnce() -> R>(&self, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_reading_device += elapsed;
        ret
    }
    pub fn instrument_write_device<R, F: FnOnce() -> R>(&self, f: F) -> R {
        let (elapsed, ret) = Self::measure(f);
        self.0.lock().unwrap().time_writing_device += elapsed;
        ret
    }
 }
--- a/crates/coremem/src/driver.rs
+++ b/crates/coremem/src/driver.rs
@@ -1,107 +1,149 @@
-use crate::geom::{Coord, Index, Meters, Region, Vec3};
+use crate::diagnostics::SyncDiagnostics;
-use crate::mat::{self, Pml};
+use crate::geom::{Coord, Index, Region};
 use crate::mat;
 use crate::meas::{self, AbstractMeasurement};
-use crate::real::{self, Real};
+use crate::real::Real;
 use crate::render::{self, MultiRenderer, Renderer};
-use crate::sim::{GenericSim, MaterialSim, SampleableSim, SimState};
+use crate::sim::AbstractSim;
 use crate::sim::units::{Frame, Time};
-use crate::sim::spirv::{self, SpirvSim};
+use crate::stim::{
-use crate::stim::AbstractStimulus;
+    DynStimuli,
    Fields,
    FieldMags,
    ModulatedVectorField,
    RenderedStimulus,
    Stimulus,
    StimuliVec,
    TimeVarying,
    VectorField,
 };
 use crate::worker::JobPool;
 use coremem_cross::compound::list;
 use coremem_cross::dim::DimSlice;
 use coremem_cross::step::SimMeta;
 use log::{info, trace};
 use serde::{Deserialize, Serialize};
 use std::cell::Cell;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
-use std::sync::mpsc::{sync_channel, SyncSender, Receiver};
+use std::time::Instant;
 use std::time::{Duration, Instant};
 use threadpool::ThreadPool;
-pub struct Driver<S=SimState> {
+pub struct Driver<R, S, Stim=DriverStimulusDynVec<R>> {
-    pub state: S,
+    state: S,
    renderer: Arc<MultiRenderer<S>>,
-    // TODO: use Rayon's thread pool?
+    render_pool: JobPool<S, ()>,
-    render_pool: ThreadPool,
+    measurements: Vec<Arc<dyn AbstractMeasurement<S>>>,
-    render_channel: (SyncSender<()>, Receiver<()>),
+    stimuli: StimAccess<R, Stim>,
    time_spent_stepping: Duration,
    time_spent_on_stimuli: Duration,
    time_spent_prepping_render: Duration,
    time_spent_blocked_on_render: Duration,
    time_spent_rendering: Arc<Mutex<Duration>>,
    measurements: Vec<Box<dyn AbstractMeasurement>>,
    stimuli: StimuliAdapter,
    start_time: Instant,
    last_diag_time: Instant,
    /// simulation end time
    sim_end_time: Option<Frame>,
    diag: SyncDiagnostics,
    last_diag_time: Instant,
 }
-pub type CpuDriver<R=real::R32, M=mat::GenericMaterial<R>> = Driver<SimState<R, M>>;
+impl<S: AbstractSim, Stim> Driver<S::Real, S, Stim> {
-pub type SpirvDriver<M=spirv::FullyGenericMaterial> = Driver<SpirvSim<M>>;
+    pub fn new_with_stim(mut state: S, stimuli: Stim) -> Self {
-
+        let diag = SyncDiagnostics::new();
-impl<R: Real, M: Default> Driver<SimState<R, M>> {
+        state.use_diagnostics(diag.clone());
    pub fn new<C: Coord>(size: C, feature_size: f32) -> Self {
        Self::new_with_state(SimState::new(size.to_index(feature_size), feature_size))
    }
 }
 impl<M: spirv::IntoFfi> SpirvDriver<M>
    where M::Ffi: Default + 'static
 {
    pub fn new_spirv<C: Coord>(size: C, feature_size: f32) -> Self {
        Self::new_with_state(SpirvSim::new(size.to_index(feature_size), feature_size))
    }
 }
 impl<S> Driver<S> {
    pub fn new_with_state(state: S) -> Self {
        Self {
            state,
            renderer: Arc::new(MultiRenderer::new()),
-            render_pool: ThreadPool::new(3),
+            render_pool: JobPool::new(1),
            render_channel: sync_channel(0),
            time_spent_stepping: Default::default(),
            time_spent_on_stimuli: Default::default(),
            time_spent_prepping_render: Default::default(),
            time_spent_blocked_on_render: Default::default(),
            time_spent_rendering: Default::default(),
            measurements: vec![
-                Box::new(meas::Time),
+                Arc::new(meas::Time),
-                Box::new(meas::Meta),
+                Arc::new(meas::Meta),
-                Box::new(meas::Energy::world()),
+                Arc::new(meas::Energy::world()),
-                Box::new(meas::Power::world()),
+                Arc::new(meas::Power::world()),
            ],
-            stimuli: StimuliAdapter::new(),
+            stimuli: StimAccess::new(diag.clone(), stimuli),
            start_time: Instant::now(),
            last_diag_time: Instant::now(),
            sim_end_time: None,
            diag,
            last_diag_time: Instant::now(),
        }
    }
 }
-impl<S> Driver<S> {
+    pub fn add_measurement<Meas: AbstractMeasurement<S> + 'static>(&mut self, m: Meas) {
-    pub fn add_stimulus<Stim: AbstractStimulus + 'static>(&mut self, s: Stim) {
+        self.measurements.push(Arc::new(m));
        self.stimuli.push(Box::new(s))
    }
-    pub fn add_measurement<Meas: AbstractMeasurement + 'static>(&mut self, m: Meas) {
+    pub fn add_stimulus<SNew>(&mut self, s: SNew)
-        self.measurements.push(Box::new(m));
+        where Stim: Pushable<SNew>
-    }
+    {
-
+        self.stimuli.push(self.state.meta(), s)
    pub fn set_steps_per_stim(&mut self, steps_per_stim: u64) {
        self.stimuli.frame_interval = steps_per_stim;
    }
 }
-impl<S: MaterialSim> Driver<S> {
+impl<S: AbstractSim> Driver<S::Real, S, DriverStimulusDynVec<S::Real>> {
    pub fn new(state: S) -> Self {
        Self::new_with_stim(state, DriverStimulusDynVec::default())
    }
 }
 impl<S: AbstractSim> Driver<S::Real, S, list::Empty> {
    pub fn new_list_stim(state: S) -> Self {
        Self::new_with_stim(state, list::Empty::default())
    }
 }
 impl<S: AbstractSim, Stim> Driver<S::Real, S, Stim> {
    /// add a stimulus onto a list of non-monomorphized stimuli.
    /// this necessarily must return a new Self.
    /// (well, with enough tuning we could actually Box just the first reference...
    pub fn with_add_stimulus<E>(self, s: E) -> Driver<S::Real, S, list::Appended<Stim, E>>
        where Stim: list::Appendable<E>
    {
        Driver {
            state: self.state,
            renderer: self.renderer,
            render_pool: self.render_pool,
            measurements: self.measurements,
            stimuli: StimAccess::new(self.diag.clone(), self.stimuli.into_inner().append(s)),
            sim_end_time: self.sim_end_time,
            diag: self.diag,
            last_diag_time: self.last_diag_time,
        }
    }
    pub fn with_stimulus<NewStim>(self, stimuli: NewStim) -> Driver<S::Real, S, NewStim> {
        Driver {
            state: self.state,
            renderer: self.renderer,
            render_pool: self.render_pool,
            measurements: self.measurements,
            stimuli: StimAccess::new(self.diag.clone(), stimuli),
            sim_end_time: self.sim_end_time,
            diag: self.diag,
            last_diag_time: self.last_diag_time,
        }
    }
    pub fn with_concrete_stimulus<T>(self) -> Driver<S::Real, S, DriverStimulusVec<T>> {
        self.with_stimulus(DriverStimulusVec::<T>::default())
    }
    pub fn with_modulated_stimulus<T>(self) -> Driver<S::Real, S, DriverStimulusModulated<S::Real, T>> {
        self.with_stimulus(DriverStimulusModulated::<S::Real, T>::default())
    }
 }
 impl<R, S, Stim> Driver<R, S, Stim> {
    /// when we step the simulation N times, we do so with a constant stimulus over those N frames.
    /// lower-resolution quantization of stimuli lets us batch more step calls (critical to perf)
    /// but at the cost of precision.
    pub fn set_steps_per_stimulus(&mut self, steps: u64) {
        self.stimuli.steps_per_stimulus = steps;
    }
 }
 impl<S: AbstractSim, Stim> Driver<S::Real, S, Stim> {
    pub fn fill_region<Reg: Region, M: Into<S::Material> + Clone>(&mut self, region: &Reg, mat: M) {
        self.state.fill_region(region, mat);
    }
-    pub fn test_region_filled<Reg: Region, M: Into<S::Material> + Clone>(&mut self, region: &Reg, mat: M) -> bool {
+    pub fn test_region_filled<Reg: Region, M>(&mut self, region: &Reg, mat: M) -> bool
    where
        M: Into<S::Material> + Clone,
        S::Material: PartialEq
    {
        self.state.test_region_filled(region, mat)
    }
 }
 impl<S: SampleableSim> Driver<S> {
    pub fn size(&self) -> Index {
        self.state.size()
    }
@@ -111,13 +153,23 @@ impl<S: SampleableSim> Driver<S> {
    pub fn time(&self) -> f32 {
        self.state.time()
    }
    pub fn add_classical_boundary<C: Coord>(&mut self, thickness: C)
        where S::Material: From<mat::IsomorphicConductor<S::Real>>
    {
        let timestep = self.state.timestep();
        self.state.fill_boundary_using(thickness, |boundary_ness| {
            let b = boundary_ness.elem_pow(3.0);
            let cond = b * (0.5 / timestep);
            let iso_cond = cond.x() + cond.y() + cond.z();
            let iso_conductor = mat::IsomorphicConductor::new(iso_cond.cast());
            iso_conductor
        });
    }
 }
-impl<S: SampleableSim + Send + Sync + 'static> Driver<S> {
+impl<S: AbstractSim + 'static, Stim> Driver<S::Real, S, Stim> {
    pub fn dyn_state(&mut self) -> &mut dyn SampleableSim {
        &mut self.state
    }
    fn add_renderer<Rend: Renderer<S> + 'static>(
        &mut self, renderer: Rend, name: &str, step_frequency: u64, frame_limit: Option<u64>
    ) {
@@ -140,114 +192,118 @@ impl<S: SampleableSim + Send + Sync + 'static> Driver<S> {
    }
 }
-impl<S: SampleableSim + Send + Sync + Serialize + 'static> Driver<S> {
+impl<S: AbstractSim + Serialize + 'static, Stim> Driver<S::Real, S, Stim> {
    pub fn add_serializer_renderer(&mut self, out_base: &str, step_frequency: u64, frame_limit: Option<u64>) {
        let fmt_str = format!("{out_base}{{step_no}}.bc", out_base=out_base);
-        self.add_renderer(render::SerializerRenderer::new_static(&*fmt_str), &*fmt_str, step_frequency, frame_limit);
+        self.add_renderer(render::SerializerRenderer::new_generic(&*fmt_str), &*fmt_str, step_frequency, frame_limit);
    }
 }
-impl<S: SampleableSim + Send + Sync + Serialize + for<'a> Deserialize<'a> + 'static> Driver<S> {
+impl<S, Stim> Driver<S::Real, S, Stim>
 where
    S: AbstractSim + Send + Sync + Serialize + for<'a> Deserialize<'a> + 'static
 {
    /// instruct the driver to periodically save the simulation state to the provided path.
    /// also attempts to load an existing state file, returning `true` on success.
    pub fn add_state_file(&mut self, state_file: &str, snapshot_frequency: u64) -> bool {
        let ser = render::SerializerRenderer::new(state_file);
-        let loaded = match ser.try_load() {
+        let loaded = ser.try_load().map(|s| {
-            Some(state) => {
+            self.state = s.state;
-                self.state = state.state;
+            self.state.use_diagnostics(self.diag.clone());
-                true
+        }).is_some();
            },
            None => false,
        };
        self.add_renderer(ser, state_file, snapshot_frequency, None);
        loaded
    }
 }
-impl<S: GenericSim + Clone + Default + Send + Sync + 'static> Driver<S> {
+impl<S, Stim> Driver<S::Real, S, Stim>
 where
    S: AbstractSim + Clone + Default + Send + 'static,
    Stim: DriverStimulus<S::Real> + Send + 'static,
 {
    fn render(&mut self) {
-        let prep_start = Instant::now();
+        let their_state = self.diag.instrument_render_prep(|| {
-        let their_state = self.state.clone();
+            if self.render_pool.num_workers() != 3 {
-        let their_measurements = self.measurements.clone();
+                let diag = self.diag.clone();
-        let renderer = self.renderer.clone();
+                // TODO: these measurements will come to differ from the ones in the Driver,
-        let time_spent_rendering = self.time_spent_rendering.clone();
+                // if the user calls `add_measurement`!
-        let sender = self.render_channel.0.clone();
+                let measurements = self.measurements.clone();
-        self.render_pool.execute(move || {
+                let renderer = self.renderer.clone();
-            // unblock the main thread (this limits the number of renders in-flight at any time
+                self.render_pool.spawn_workers(3, move |state| {
-            sender.send(()).unwrap();
+                    // unblock the main thread (this limits the number of renders in-flight at any time
-            trace!("render begin");
+                    trace!("render begin");
-            let start_time = Instant::now();
+                    diag.instrument_render_cpu_side(|| {
-            renderer.render(&their_state, &*their_measurements, Default::default());
+                        let meas: Vec<&dyn AbstractMeasurement<S>> = measurements.iter().map(|m| &**m).collect();
-            *time_spent_rendering.lock().unwrap() += start_time.elapsed();
+                        renderer.render(&state, &*meas, Default::default());
-            trace!("render end");
+                    });
                    trace!("render end");
                });
            }
            self.state.clone()
        });
        // TODO: this instrumentation is not 100% accurate.
        // - 'prep' and 'blocked' have effectively been folded together.
        // - either delete 'prep', or change this block to use a `try_send` (prep) followed by a
        // `send` (blocking)
        self.diag.instrument_render_blocked(|| {
            self.render_pool.tend();
            self.render_pool.send(their_state);
        });
        self.time_spent_prepping_render += prep_start.elapsed();
        let block_start = Instant::now();
        self.render_channel.1.recv().unwrap();
        self.time_spent_blocked_on_render += block_start.elapsed();
    }
    /// Return the number of steps actually stepped
    fn step_at_most(&mut self, at_most: u32) -> u32 {
-        assert!(at_most != 0);
+        let diag = self.diag.clone();
-        let start_step = self.state.step_no();
+        diag.instrument_driver(move || {
-        if self.stimuli.should_apply(start_step) {
+            assert!(at_most != 0);
-            self.stimuli.real_time = self.state.time();
+            let start_step = self.state.step_no();
            self.stimuli.time_step = self.state.timestep();
            trace!("updating stimuli");
        }
-        if self.renderer.any_work_for_frame(start_step) {
+            if self.renderer.any_work_for_frame(start_step) {
-            self.render();
+                self.render();
-        }
+            }
-        let mut can_step = 1;
+            // maybe the renderer or stimulus needs servicing before the max frame the user asked for.
-        while can_step < at_most && !self.renderer.any_work_for_frame(start_step + can_step as u64) {
+            // step less than `at_most`, in that case.
-            can_step += 1;
+            let next_frame_for_user = start_step + at_most as u64;
-        }
+            let next_frame_to_render = self.renderer.next_frame_for_work(start_step);
-        trace!("step begin");
+            let next_frame_for_stim = self.stimuli.next_frame_for_work(start_step);
-        let start_time = Instant::now();
+            let step_to = [Some(next_frame_for_user), next_frame_to_render, Some(next_frame_for_stim)]
-        self.state.step_multiple(can_step, &self.stimuli);
+                .into_iter()
-        self.time_spent_stepping += start_time.elapsed();
+                .flatten()
-        trace!("step end");
+                .min()
-        if self.last_diag_time.elapsed().as_secs_f64() >= 5.0 {
+                .unwrap();
-            self.last_diag_time = Instant::now();
+            let steps_this_time = (step_to - start_step).try_into().unwrap();
-            let step = self.state.step_no();
+
-            let step_time = self.time_spent_stepping.as_secs_f64();
+            let meta = self.state.meta();
-            let stim_time = self.time_spent_on_stimuli.as_secs_f64();
+            let stim = self.stimuli.get_for(meta, start_step);
-            let render_time = self.time_spent_rendering.lock().unwrap().as_secs_f64();
+            // prefetch the next stimulus, in the background.
-            let render_prep_time = self.time_spent_prepping_render.as_secs_f64();
+            self.diag.instrument_stimuli_prep(|| {
-            let block_time = self.time_spent_blocked_on_render.as_secs_f64();
+                self.stimuli.start_job(meta, step_to);
-            let overall_time = self.start_time.elapsed().as_secs_f64();
+            });
-            let fps = (self.state.step_no() as f64) / overall_time;
+
-            let sim_time = self.state.time() as f64;
+            trace!("step begin");
-            let percent_complete = match self.sim_end_time {
+            self.diag.instrument_step(steps_this_time as u64, || {
-                Some(t) => format!("[{:.1}%] ", 100.0 * self.state.time() / *t.to_seconds(self.timestep())),
+                self.state.step_multiple(steps_this_time, &stim);
-                None => "".to_owned(),
+            });
-            };
+            trace!("step end");
-            info!(
+
-                "{}t={:.2e} frame {:06} fps: {:6.2} (sim: {:.1}s, stim: {:.1}s, [render: {:.1}s], blocked: {:.1}s, render_prep: {:.1}s, other: {:.1}s)",
+            if self.last_diag_time.elapsed().as_secs_f64() >= 5.0 {
-                percent_complete,
+                self.last_diag_time = Instant::now();
-                sim_time,
+                let step = self.state.step_no();
-                step,
+                let diagstr = self.diag.format();
-                fps,
+                let sim_time = self.state.time() as f64;
-                step_time,
+                let percent_complete = self.sim_end_time.map(|t| {
-                stim_time,
+                    format!("[{:.1}%] ", 100.0 * self.state.time() / *t.to_seconds(self.timestep()))
-                render_time,
+                }).unwrap_or_default();
-                block_time,
+                info!(
-                render_prep_time,
+                    "{}t={:.2e} frame {:06} {}",
-                overall_time - step_time - stim_time - block_time - render_prep_time,
+                    percent_complete, sim_time, step, diagstr
-            );
+                );
-        }
+            }
-        can_step as u32
+            steps_this_time
-    }
+        })
    pub fn step_multiple(&mut self, num_steps: u32) {
        let mut steps_remaining = num_steps;
        while steps_remaining != 0 {
            steps_remaining -= self.step_at_most(steps_remaining);
        }
    }
    pub fn step(&mut self) {
-        self.step_multiple(1);
+        self.step_at_most(1);
    }
    /// Returns the number of timesteps needed to reach the end time
@@ -261,101 +317,237 @@ impl<S: GenericSim + Clone + Default + Send + Sync + 'static> Driver<S> {
        let sim_end_time = sim_end_time.to_frame(self.state.timestep());
        self.sim_end_time = Some(sim_end_time);
        let mut stepped = false;
-        while self.dyn_state().step_no() < *sim_end_time {
+        while self.state.step_no() < *sim_end_time {
-            self.step_multiple(100);
+            let steps_left = *sim_end_time - self.state.step_no();
            // sanity limit: don't try to step too much at once else we may lock up the GPU/etc.
            self.step_at_most(steps_left.min(1000) as u32);
            stepped = true;
        }
        if stepped {
            // render the final frame -- unless we already *have*
            self.render();
        }
-        self.render_pool.join();
+        self.render_pool.join_workers();
        self.sim_end_time = None;
    }
 }
-impl<S: MaterialSim> Driver<S> {
+// this is effectively `Cow`, but without the `ToOwned` (Clone) requirement
-    pub fn add_pml_boundary<C: Coord, R: Real>(&mut self, thickness: C)
+pub enum ValueOrRef<'a, T> {
-        where S::Material: From<Pml<R>>
+    Value(T),
-    {
+    Ref(&'a T),
        let timestep = self.state.timestep();
        self.state.fill_boundary_using(thickness, |boundary_ness| {
            let b = boundary_ness.elem_pow(3.0);
            let conductivity = b * (0.5 / timestep);
            Pml::new(conductivity)
        });
    }
    pub fn add_classical_boundary<C: Coord>(&mut self, thickness: C)
        where S::Material: From<mat::IsomorphicConductor<f32>>
    {
        self.add_classical_boundary_explicit::<f32, _>(thickness)
    }
    /// the CPU code is parameterized over `Real`: you'll need to use this interface to get access
    /// to that, if using a CPU driver. otherwise, use `add_classical_boundary`
    pub fn add_classical_boundary_explicit<R: Real, C: Coord>(&mut self, thickness: C)
        where S::Material: From<mat::IsomorphicConductor<R>>
    {
        let timestep = self.state.timestep();
        self.state.fill_boundary_using(thickness, |boundary_ness| {
            let b = boundary_ness.elem_pow(3.0);
            let cond = b * (0.5 / timestep);
            let iso_cond = cond.x() + cond.y() + cond.z();
            let iso_conductor = mat::IsomorphicConductor::new(iso_cond.cast());
            iso_conductor
        });
    }
 }
-
+impl<'a, T> AsRef<T> for ValueOrRef<'a, T> {
-/// Adapts the stimuli to be applied only every so often, to improve perf
+    fn as_ref(&self) -> &T {
-struct StimuliAdapter {
+        match self {
-    stim: Vec<Box<dyn AbstractStimulus>>,
+            ValueOrRef::Value(x) => &x,
-    /// How many frames to go between applications of the stimulus.
+            ValueOrRef::Ref(x) => x,
    frame_interval: u64,
    real_time: f32,
    time_step: f32,
 }
 impl AbstractStimulus for StimuliAdapter {
    fn at(&self, t_sec: f32, pos: Meters) -> (Vec3<f32>, Vec3<f32>) {
        self.stim.at(t_sec, pos)
        // TODO: remove this stuff  (here only for testing)
        /*
        if true {
            // interpolation unaware (i.e. let the Sim backend do it)
        } else if false {
            // delta-fn "interpolation"
            self.stim.at(t_sec, pos) * (self.frame_interval as f32)
        } else if false {
            // step-fn "interpolation"
            self.stim.at(self.real_time, pos)
        } else {
            // linear interpolation
            let interp_width = self.frame_interval as f32 * self.time_step;
            let prev = self.stim.at(self.real_time, pos);
            let next = self.stim.at(self.real_time + interp_width, pos);
            let interp = (t_sec - self.real_time) / interp_width;
            prev * (1.0 - interp) + next * interp
        }
        */
    }
 }
-impl StimuliAdapter {
+/// gives an opportunity to optimize a Stimulus for a specific setting
-    fn new() -> Self {
+/// before passing it off to the simulation.
 pub trait DriverStimulus<R: Real> {
    type Optimized: Stimulus<R>;
    fn optimized_for<'a>(
        &'a self, meta: SimMeta<f32>, _step: u64
    ) -> ValueOrRef<'a, Self::Optimized>;
 }
 pub trait Pushable<T> {
    fn push(&mut self, meta: SimMeta<f32>, t: T);
 }
 pub struct DriverStimulusVec<T>(StimuliVec<T>);
 impl<T> Default for DriverStimulusVec<T> {
    fn default() -> Self {
        Self(Default::default())
    }
 }
 impl<R: Real, T: Stimulus<R>> DriverStimulus<R> for DriverStimulusVec<T> {
    type Optimized = StimuliVec<T>;
    fn optimized_for<'a>(
        &'a self, _meta: SimMeta<f32>, _step: u64
    ) -> ValueOrRef<'a, Self::Optimized> {
        ValueOrRef::Ref(&self.0)
    }
 }
 impl<T> Pushable<T> for DriverStimulusVec<T> {
    fn push(&mut self, _meta: SimMeta<f32>, t: T) {
        self.0.push(t)
    }
 }
 #[derive(Default)]
 pub struct DriverStimulusDynVec<R>(DynStimuli<R>);
 impl<R: Real> DriverStimulus<R> for DriverStimulusDynVec<R> {
    type Optimized = DynStimuli<R>;
    fn optimized_for<'a>(
        &'a self, _meta: SimMeta<f32>, _step: u64
    ) -> ValueOrRef<'a, Self::Optimized> {
        ValueOrRef::Ref(&self.0)
    }
 }
 impl<R: Real, T: Stimulus<R> + Send + 'static> Pushable<T> for DriverStimulusDynVec<R> {
    fn push(&mut self, _meta: SimMeta<f32>, t: T) {
        self.0.push(Box::new(t))
    }
 }
 /// optimized stimulus which will evaluate the vector fields _only once_
 pub struct DriverStimulusModulated<R, T>(StimuliVec<ModulatedStaticField<R, T>>);
 impl<R: Real, T> Default for DriverStimulusModulated<R, T> {
    fn default() -> Self {
        Self(Default::default())
    }
 }
 impl<R: Real, V: VectorField<R>, T> Pushable<ModulatedVectorField<V, T>> for DriverStimulusModulated<R, T> {
    fn push(&mut self, meta: SimMeta<f32>, stim: ModulatedVectorField<V, T>) {
        let (vfield, timef) = stim.into_inner();
        let dim = meta.dim();
        let mut storage = Vec::new();
        storage.resize_with(dim.product_sum_usize(), Fields::default);
        let mut view = DimSlice::new(dim, storage);
        let mut_view: DimSlice<&mut [_]> = view.as_mut();
        for (loc, value) in mut_view.enumerated() {
            *value = vfield.at(meta.feature_size().cast(), loc.into());
        }
        self.0.push(ModulatedStaticField::new(view, timef))
    }
 }
 impl<R: Real, T: TimeVarying<R>> DriverStimulus<R> for DriverStimulusModulated<R, T> {
    type Optimized = StimuliVec<ModulatedStaticField<R, FieldMags<R>>>;
    fn optimized_for<'a>(
        &'a self, meta: SimMeta<f32>, step: u64,
    ) -> ValueOrRef<'a, Self::Optimized> {
        let t_sec = meta.time_step().cast::<R>() * R::from_primitive(step);
        let opt = self.0.iter().map(|modulated| ModulatedVectorField::new(
            // TODO: remove this costly clone!
            (*modulated.fields()).clone(),
            modulated.modulation().at(t_sec),
        )).collect();
        ValueOrRef::Value(StimuliVec::from_vec(opt))
    }
 }
 /// a Stimulus where the field has been pre-calculated
 pub type ModulatedStaticField<R, T> = ModulatedVectorField<DimSlice<Vec<Fields<R>>>, T>;
 /// wraps a Stimulus to help provide async functionality on top of it.
 /// the caller can request evaluation at a specific time, and either block on that or
 /// come back and re-request that time later, expecting that it's been evaluated in the background.
 struct StimAccess<R, T> {
    stim: Arc<Mutex<T>>,
    steps_per_stimulus: u64,
    diag: SyncDiagnostics,
    /// is the background thread doing work (or, has it completed work and placed it on the return
    /// queue)?
    /// A.K.A. "can i safely do a blocking recv on response_channel".
    outstanding: Cell<bool>,
    worker: JobPool<(SimMeta<f32>, u64), (SimMeta<f32>, u64, RenderedStimulus<R>)>,
 }
 impl<R, T> StimAccess<R, T> {
    fn new(diag: SyncDiagnostics, stim: T) -> Self {
        Self {
-            stim: Default::default(),
+            stim: Arc::new(Mutex::new(stim)),
-            frame_interval: 1,
+            steps_per_stimulus: 1,
-            real_time: 0.0,
+            diag,
-            time_step: 0.0,
+            outstanding: Cell::new(false),
            worker: JobPool::new(1),
        }
    }
-    fn should_apply(&self, frame: u64) -> bool {
+    fn into_inner(self) -> T {
-        (frame % self.frame_interval == 0) && self.stim.len() != 0
+        let _ = self.maybe_wait_for_job(Default::default(), 0);
        // with the worker joined, there should be no outstanding handles on the arc.
        Arc::try_unwrap(self.stim).ok().unwrap().into_inner().unwrap()
    }
-    fn push(&mut self, s: Box<dyn AbstractStimulus>) {
+    fn next_frame_for_work(&self, after: u64) -> u64 {
-        self.stim.push(s)
+        let f = after + self.steps_per_stimulus;
        f - f % self.steps_per_stimulus
    }
    /// used internally.
    /// waits for an outstanding job (if any).
    /// if the response matches the request, return the response,
    /// else discard the response.
    fn maybe_wait_for_job(&self, meta: SimMeta<f32>, step: u64) -> Option<RenderedStimulus<R>> {
        if !self.outstanding.get() {
            return None;
        }
        // block until job is complete and receive the result
        let completed = self.diag.instrument_stimuli_blocked(|| {
            self.worker.recv()
        });
        let (job_meta, job_step, rendered) = completed;
        self.outstanding.set(false);
        Some(rendered)
            .filter(|_| (job_meta, job_step) == (meta, step))
    }
 }
 impl<R: Real, T: DriverStimulus<R> + Send + 'static> StimAccess<R, T> {
    fn get_for(&mut self, meta: SimMeta<f32>, step: u64) -> RenderedStimulus<R> {
        // either claim the outstanding job (if it exists and matches)...
        self.maybe_wait_for_job(meta, step).unwrap_or_else(|| {
            // or start a job and wait for it to complete inline
            self.start_job(meta, step);
            self.maybe_wait_for_job(meta, step).unwrap()
        })
    }
    // begin rendering the stimulus in the background
    fn start_job(&mut self, meta: SimMeta<f32>, step: u64) {
        // only one in-progress job allowed!
        assert!(!self.outstanding.get());
        self.outstanding.set(true);
        self.ensure_worker();
        self.worker.send((meta, step));
    }
    fn ensure_worker(&mut self) {
        if self.worker.num_workers() != 0 {
            return;
        }
        let stim = self.stim.clone();
        let diag = self.diag.clone();
        self.worker.spawn_worker(move |(meta, step)| {
            let stim = diag.instrument_stimuli(|| {
                let stim = stim.lock().unwrap();
                let opt = stim.optimized_for(meta, step);
                opt.as_ref().rendered(
                    meta.time_step().cast(),
                    // TODO: convert this to an integer
                    meta.time_step().cast::<R>() * R::from_primitive(step),
                    meta.feature_size().cast(),
                    meta.dim()
                ).into_owned()
                //^ this 'into_owned' ought to be a no-op.
                //^ it would only ever be borrowed if we accidentally called `rendered` twice.
            });
            (meta, step, stim)
        });
    }
 }
 impl<R, S, T: Pushable<S>> Pushable<S> for StimAccess<R, T> {
    fn push(&mut self, meta: SimMeta<f32>, t: S) {
        // invalidate any outstanding jobs (because the stimulus will have changed)
        let _ = self.maybe_wait_for_job(Default::default(), 0);
        self.stim.lock().unwrap().push(meta, t)
    }
 }
--- a/crates/coremem/src/geom/line.rs
+++ b/crates/coremem/src/geom/line.rs
@@ -1,5 +1,5 @@
-use crate::geom::Vec2;
+use coremem_cross::real::Real;
-use crate::real::Real;
+use coremem_cross::vec::Vec2;
 use std::ops::Add;
--- a/crates/coremem/src/geom/mod.rs
+++ b/crates/coremem/src/geom/mod.rs
@@ -6,8 +6,22 @@ mod units;
 pub use line::Line2d;
 pub use polygon::Polygon2d;
 pub use region::{
-    Cube, CylinderZ, Dilate, InvertedRegion, Memoize, Region, Sphere, Spiral, SwapXZ, SwapYZ, Torus, Translate, Union, WorldRegion, Wrap
+    Cube,
    CylinderZ,
    Dilate,
    HasCrossSection,
    InvertedRegion,
    Memoize,
    Region,
    Sphere,
    Spiral,
    SwapXZ,
    SwapYZ,
    Torus,
    Translate,
    Union,
    WorldRegion,
    Wrap,
 };
 pub use units::{Coord, Meters, OrdMeters, Index};
 pub use coremem_types::vec::{Vec2, Vec3, Vec3u};
--- a/crates/coremem/src/geom/polygon.rs
+++ b/crates/coremem/src/geom/polygon.rs
@@ -1,5 +1,6 @@
-use crate::geom::{Line2d, Vec2};
+use crate::geom::Line2d;
-use crate::real::Real;
+use coremem_cross::real::Real;
 use coremem_cross::vec::Vec2;
 #[derive(Clone, Debug, PartialEq)]
 pub struct Polygon2d<R> {
--- a/crates/coremem/src/geom/region/constructed.rs
+++ b/crates/coremem/src/geom/region/constructed.rs
@@ -0,0 +1,97 @@
 use crate::geom::Meters;
 use super::{
    and_not,
    Cube,
    HasCrossSection,
    Intersection,
    InvertedRegion,
    Region,
    Torus,
    Union,
    Union4,
 };
 use coremem_cross::vec::Vec3;
 /// it's a torus, but elongated around its axis to resemble a pill shape.
 ///
 /// ```notrust
 ///   _______
 ///  /       \
 /// |         |
 ///  \_______/
 /// ```
 pub struct ElongatedTorus(Union4<
    Intersection<Torus, InvertedRegion<Cube>>, // rounded top
    Intersection<Torus, InvertedRegion<Cube>>, // rounded bottom
    Cube, // left connection between top/bot
    Cube, // right connection between top/bot
 >);
 impl ElongatedTorus {
    pub fn new_xz(center: Meters, length: f32, major_rad: f32, minor_rad: f32) -> Self {
        let body = Cube::new_centered(
            center,
            Meters::new(2.0 * (major_rad + minor_rad), 2.0 * minor_rad, length),
        );
        let top = and_not(
            Torus::new_xz(
                center + Meters::new(0.0, 0.0, 0.5 * length),
                major_rad,
                minor_rad,
            ),
            body,
        );
        let bot = and_not(
            Torus::new_xz(
                center - Meters::new(0.0, 0.0, 0.5 * length),
                major_rad,
                minor_rad,
            ),
            body,
        );
        // TODO: these should be cylinders
        let left = Cube::new_centered(
            center - Meters::new(major_rad, 0.0, 0.0),
            Meters::new(2.0 * minor_rad, 2.0 * minor_rad, length),
        );
        let right = Cube::new_centered(
            center + Meters::new(major_rad, 0.0, 0.0),
            Meters::new(2.0 * minor_rad, 2.0 * minor_rad, length),
        );
        Self(Union::new4(
            top,
            bot,
            left,
            right,
        ))
    }
 }
 impl Region for ElongatedTorus {
    fn contains(&self, p: Meters) -> bool {
        self.0.contains(p)
    }
 }
 impl HasCrossSection for ElongatedTorus {
    fn cross_section_normal(&self, p: Meters) -> Vec3<f32> {
        let top = self.0.region0_of_4();
        let bot = self.0.region1_of_4();
        let right = self.0.region3_of_4();
        let left = self.0.region2_of_4();
        let bridge_area =
            (right.x_range().end - right.x_range().start)
            * (right.y_range().start - right.y_range().end);
        if top.contains(p) {
            top.region0_of_2().cross_section_normal(p)
        } else if bot.contains(p) {
            bot.region0_of_2().cross_section_normal(p)
        } else if right.contains(p) {
            Vec3::new(0.0, 0.0, bridge_area)
        } else if left.contains(p) {
            Vec3::new(0.0, 0.0, -bridge_area)
        } else {
            Vec3::default()
        }
    }
 }
--- a/crates/coremem/src/geom/region/mod.rs
+++ b/crates/coremem/src/geom/region/mod.rs
@@ -1,31 +1,40 @@
 use crate::geom::{Coord, Meters, OrdMeters};
-use dyn_clone::{self, DynClone};
+use coremem_cross::vec::Vec3;
 use rayon::prelude::*;
 use serde::{Serialize, Deserialize};
 use std::collections::BTreeSet;
 use std::ops::Deref;
 use std::sync::Arc;
 mod constructed;
 pub use constructed::*;
 mod primitives;
 pub use primitives::*;
-#[typetag::serde(tag = "type")]
+pub trait Region: Send + Sync {
 pub trait Region: Send + Sync + DynClone {
    fn contains(&self, p: Meters) -> bool;
 }
 dyn_clone::clone_trait_object!(Region);
-pub fn and<T1: Region + 'static, T2: Region + 'static>(r1: T1, r2: T2) -> Intersection {
+/// some (volume) which has a tangent vector everywhere inside/on it.
-    Intersection::new().and(r1).and(r2)
+/// for example, a cylinder has tangents everywhere except its axis.
 /// the returned vector should represent the area of the cross section.
 pub trait HasCrossSection {
    fn cross_section_normal(&self, p: Meters) -> Vec3<f32>;
 }
-pub fn and_not<T1: Region + 'static, T2: Region + 'static>(r1: T1, r2: T2) -> Intersection {
+pub fn and<T1: Region + 'static, T2: Region + 'static>(r1: T1, r2: T2) -> Intersection<T1, T2> {
    Intersection::new2(r1, r2)
 }
 pub fn and_not<T1: Region + 'static, T2: Region + 'static>(r1: T1, r2: T2) -> Intersection<T1, InvertedRegion<T2>> {
    and(r1, InvertedRegion::new(r2))
 }
-pub fn union<T1: Region + 'static, T2: Region + 'static>(r1: T1, r2: T2) -> Union {
+pub fn union<T1: Region + 'static, T2: Region + 'static>(r1: T1, r2: T2) -> Union<T1, T2> {
-    Union::new().with(r1).with(r2)
+    Union::new2(r1, r2)
 }
 /// returns true if there's a path (via the cardinal directions) from p0 to p1 within this region.
@@ -67,137 +76,179 @@ pub fn distance_to<R: Region, C: Coord>(r: &R, p0: C, p1: C, feat_size: f32) ->
 }
 /// Region describing the entire simulation space
-#[derive(Copy, Clone, Serialize, Deserialize)]
+#[derive(Copy, Clone, Default, Serialize, Deserialize)]
 pub struct WorldRegion;
 #[typetag::serde]
 impl Region for WorldRegion {
    fn contains(&self, _: Meters) -> bool {
        true
    }
 }
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Default, Serialize, Deserialize)]
-pub struct InvertedRegion(Box<dyn Region>);
+pub struct InvertedRegion<R>(R);
-impl InvertedRegion {
+impl<R> InvertedRegion<R> {
-    pub fn new<R: Region + 'static>(r: R) -> Self {
+    pub fn new(r: R) -> Self {
-        Self(Box::new(r))
+        Self(r)
    }
 }
-#[typetag::serde]
+impl<R: Region> Region for InvertedRegion<R> {
 impl Region for InvertedRegion {
    fn contains(&self, p: Meters) -> bool {
        !self.0.contains(p)
    }
 }
 #[derive(Clone, Default, Serialize, Deserialize)]
-pub struct Union(Vec<Box<dyn Region>>);
+pub struct Union<R1, R2>(R1, R2);
-impl Union {
+pub type Union3<R1, R2, R3> = Union<Union<R1, R2>, R3>;
-    pub fn new() -> Self {
+pub type Union4<R1, R2, R3, R4> = Union<Union3<R1, R2, R3>, R4>;
-        Self(Vec::new())
+
 impl<R1, R2> Union<R1, R2> {
    pub fn with<R: Region>(self, r: R) -> Union<Self, R> {
        Union::new2(self, r)
    }
-    pub fn new_with<R: Region + 'static>(r: R) -> Self {
+    pub fn new2(r1: R1, r2: R2) -> Self {
-        Self::new().with(r)
+        Self(r1, r2)
    }
-    pub fn with<R: Region + 'static>(self, r: R) -> Self {
+    pub fn new3<R3: Region>(r1: R1, r2: R2, r3: R3) -> Union<Self, R3> {
-        self.with_box(Box::new(r))
+        Union::new2(r1, r2).with(r3)
    }
-    pub fn with_box(mut self, r: Box<dyn Region>) -> Self {
+    pub fn new4<R3: Region, R4: Region>(r1: R1, r2: R2, r3: R3, r4: R4) -> Union<Union<Self, R3>, R4> {
-        self.0.push(r);
+        Union::new2(r1, r2).with(r3).with(r4)
        self
    }
 }
-#[typetag::serde]
+impl<R0, R1> Union<R0, R1> {
-impl Region for Union {
+    pub fn region0_of_2(&self) -> &R0 {
        &self.0
    }
    pub fn region1_of_2(&self) -> &R1 {
        &self.1
    }
 }
 impl<R0, R1, R2> Union3<R0, R1, R2> {
    pub fn region0_of_3(&self) -> &R0 {
        self.0.region0_of_2()
    }
    pub fn region1_of_3(&self) -> &R1 {
        self.0.region1_of_2()
    }
    pub fn region2_of_3(&self) -> &R2 {
        &self.1
    }
 }
 impl<R0, R1, R2, R3> Union4<R0, R1, R2, R3> {
    pub fn region0_of_4(&self) -> &R0 {
        self.0.region0_of_3()
    }
    pub fn region1_of_4(&self) -> &R1 {
        self.0.region1_of_3()
    }
    pub fn region2_of_4(&self) -> &R2 {
        self.0.region2_of_3()
    }
    pub fn region3_of_4(&self) -> &R3 {
        &self.1
    }
 }
 impl<R1: Region, R2: Region> Region for Union<R1, R2> {
    fn contains(&self, p: Meters) -> bool {
-        self.0.iter().any(|r| r.contains(p))
+        self.0.contains(p) || self.1.contains(p)
    }
 }
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Default, Serialize, Deserialize)]
-pub struct Intersection(Vec<Box<dyn Region>>);
+pub struct Intersection<R1, R2>(R1, R2);
-impl Intersection {
+impl<R1, R2> Intersection<R1, R2> {
-    pub fn new() -> Self {
+    pub fn and<R3: Region>(self, r: R3) -> Intersection<Self, R3> {
-        Self(Vec::new())
+        Intersection::new2(self, r)
    }
-    pub fn new_with<R: Region + 'static>(r: R) -> Self {
+    pub fn new2(r1: R1, r2: R2) -> Self {
-        Self::new().and(r)
+        Self(r1, r2)
    }
-    pub fn and<R: Region + 'static>(self, r: R) -> Self {
+    pub fn new3<R3: Region>(r1: R1, r2: R2, r3: R3) -> Intersection<Self, R3> {
-        self.and_box(Box::new(r))
+        Intersection::new2(r1, r2).and(r3)
    }
-    pub fn and_box(mut self, r: Box<dyn Region>) -> Self {
+    pub fn region0_of_2(&self) -> &R1 {
-        self.0.push(r);
+        &self.0
-        self
+    }
    pub fn region1_of_2(&self) -> &R2 {
        &self.1
    }
 }
-#[typetag::serde]
+impl<R1: Region, R2: Region> Region for Intersection<R1, R2> {
 impl Region for Intersection {
    fn contains(&self, p: Meters) -> bool {
-        self.0.iter().all(|r| r.contains(p))
+        self.0.contains(p) && self.1.contains(p)
    }
 }
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Default, Serialize, Deserialize)]
-pub struct Translate {
+pub struct Translate<R> {
-    inner: Box<dyn Region>,
+    inner: R,
    shift: Meters,
 }
-impl Translate {
+impl<R> Translate<R> {
-    pub fn new<T: Region + 'static>(inner: T, shift: Meters) -> Self {
+    pub fn new(inner: R, shift: Meters) -> Self {
-        Self { inner: Box::new(inner), shift }
+        Self { inner, shift }
    }
 }
-#[typetag::serde]
+impl<R> Deref for Translate<R> {
-impl Region for Translate {
+    type Target = R;
    fn deref(&self) -> &Self::Target {
        &self.inner
    }
 }
 impl<R: Region> Region for Translate<R> {
    fn contains(&self, p: Meters) -> bool {
        self.inner.contains(p - self.shift)
    }
 }
-
+impl<R: HasCrossSection> HasCrossSection for Translate<R> {
-#[derive(Clone, Serialize, Deserialize)]
+    fn cross_section_normal(&self, p: Meters) -> Vec3<f32> {
-pub struct SwapXZ {
+        self.inner.cross_section_normal(p - self.shift)
    inner: Box<dyn Region>,
 }
 impl SwapXZ {
    pub fn new<T: Region + 'static>(inner: T) -> Self {
        Self { inner: Box::new(inner) }
    }
 }
-#[typetag::serde]
+#[derive(Clone, Default, Serialize, Deserialize)]
-impl Region for SwapXZ {
+pub struct SwapXZ<R> {
    inner: R,
 }
 impl<R> SwapXZ<R> {
    pub fn new(inner: R) -> Self {
        Self { inner }
    }
 }
 impl<R: Region> Region for SwapXZ<R> {
    fn contains(&self, p: Meters) -> bool {
        let p = Meters::new(p.z(), p.y(), p.z());
        self.inner.contains(p)
    }
 }
-
+#[derive(Clone, Default, Serialize, Deserialize)]
-#[derive(Clone, Serialize, Deserialize)]
+pub struct SwapYZ<R> {
-pub struct SwapYZ {
+    inner: R,
    inner: Box<dyn Region>,
 }
-impl SwapYZ {
+impl<R> SwapYZ<R> {
-    pub fn new<T: Region + 'static>(inner: T) -> Self {
+    pub fn new(inner: R) -> Self {
-        Self { inner: Box::new(inner) }
+        Self { inner }
    }
 }
-#[typetag::serde]
+impl<R: Region> Region for SwapYZ<R> {
 impl Region for SwapYZ {
    fn contains(&self, p: Meters) -> bool {
        let mapped = Meters::new(p.x(), p.z(), p.y());
        self.inner.contains(mapped)
@@ -210,20 +261,20 @@ impl Region for SwapYZ {
 /// the resulting region is mapped onto the original region y=[0, y_max]. x is just the radius
 /// so that (0, 0) is mapped to (0, 0), and (1, 0) is mapped to (1, 0) and (0, 1) is mapped to
 /// (1, 0.5*y_max) and (-5, 0) is mapped to (5, 0.5*y_max).
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Default, Serialize, Deserialize)]
-pub struct Wrap {
+pub struct Wrap<R> {
-    inner: Box<dyn Region>,
+    inner: R,
    y_max: f32,
    about: Meters,
 }
-impl Wrap {
+impl<R> Wrap<R> {
-    pub fn new<T: Region + 'static>(inner: T, y_max: f32) -> Self {
+    pub fn new(inner: R, y_max: f32) -> Self {
        Self::new_about(inner, y_max, Meters::new(0.0, 0.0, 0.0))
    }
-    pub fn new_about<T: Region + 'static>(inner: T, y_max: f32, about: Meters) -> Self {
+    pub fn new_about(inner: R, y_max: f32, about: Meters) -> Self {
-        Self { inner: Box::new(inner), y_max, about }
+        Self { inner, y_max, about }
    }
    fn map(&self, p: Meters) -> Meters {
@@ -235,28 +286,26 @@ impl Wrap {
    }
 }
-#[typetag::serde]
+impl<R: Region> Region for Wrap<R> {
 impl Region for Wrap {
    fn contains(&self, p: Meters) -> bool {
        self.inner.contains(self.map(p))
    }
 }
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Default, Serialize, Deserialize)]
-pub struct Dilate {
+pub struct Dilate<R> {
-    inner: Box<dyn Region>,
+    inner: R,
    rad: f32,
    res: f32,
 }
-impl Dilate {
+impl<R> Dilate<R> {
-    pub fn new<T: Region + 'static>(inner: T, rad: f32, res: f32) -> Self {
+    pub fn new(inner: R, rad: f32, res: f32) -> Self {
-        Self { inner: Box::new(inner), rad, res }
+        Self { inner, rad, res }
    }
 }
-#[typetag::serde]
+impl<R: Region> Region for Dilate<R> {
 impl Region for Dilate {
    fn contains(&self, p: Meters) -> bool {
        let rad_iters = (self.rad / self.res).ceil() as i32;
        let rad_range = -rad_iters..=rad_iters;
@@ -279,24 +328,85 @@ impl Region for Dilate {
    }
 }
-#[derive(Clone, Serialize, Deserialize)]
+pub struct Rotate<R> {
-pub struct Memoize {
+    region: R,
-    #[serde(skip)]
+    /// angle (radians) about the +x axis
-    lut: Arc<dashmap::DashMap<OrdMeters, bool>>,
+    about_x: f32,
-    inner: Box<dyn Region>,
+    /// angle (radians) about the +y axis
    about_y: f32,
    /// angle (radians) about the +z axis
    about_z: f32,
 }
-impl Memoize {
+impl<R> Rotate<R> {
-    pub fn new<R: Region + 'static>(inner: R) -> Self {
+    pub fn about_x(about_x: f32, region: R) -> Self {
        Self::about_x_y_z(about_x, 0.0, 0.0, region)
    }
    pub fn about_y(about_y: f32, region: R) -> Self {
        Self::about_x_y_z(0.0, about_y, 0.0, region)
    }
    pub fn about_z(about_z: f32, region: R) -> Self {
        Self::about_x_y_z(0.0, 0.0, about_z, region)
    }
    pub fn about_x_y_z(about_x: f32, about_y: f32, about_z: f32, region: R) -> Self {
        Self {
            region, about_x, about_y, about_z
        }
    }
    fn rotate_into_region(&self, global: Vec3<f32>) -> Vec3<f32> {
        global
            .rotate_yz(-self.about_x)
            .rotate_xz(-self.about_y)
            .rotate_xy(-self.about_z)
    }
    fn rotate_out_of_region(&self, local: Vec3<f32>) -> Vec3<f32> {
        local
            .rotate_yz(self.about_x)
            .rotate_xz(self.about_y)
            .rotate_xy(self.about_z)
    }
 }
 impl<R> Deref for Rotate<R> {
    type Target = R;
    fn deref(&self) -> &Self::Target {
        &self.region
    }
 }
 impl<R: Region> Region for Rotate<R> {
    fn contains(&self, p: Meters) -> bool {
        self.region.contains(Meters(self.rotate_into_region(p.0)))
    }
 }
 impl<R: HasCrossSection> HasCrossSection for Rotate<R> {
    fn cross_section_normal(&self, p: Meters) -> Vec3<f32> {
        self.rotate_out_of_region(
            self.region.cross_section_normal(
                Meters(self.rotate_into_region(p.0))
            )
        )
    }
 }
 #[derive(Clone, Default, Serialize, Deserialize)]
 pub struct Memoize<R> {
    #[serde(skip)]
    lut: Arc<dashmap::DashMap<OrdMeters, bool>>,
    inner: R,
 }
 impl<R> Memoize<R> {
    pub fn new(inner: R) -> Self {
        Self {
            lut: Arc::new(dashmap::DashMap::new()),
-            inner: Box::new(inner),
+            inner,
        }
    }
 }
-#[typetag::serde]
+impl<R: Region> Region for Memoize<R> {
 impl Region for Memoize {
    fn contains(&self, p: Meters) -> bool {
        *self.lut.entry(OrdMeters(p)).or_insert_with(|| self.inner.contains(p))
    }
@@ -307,7 +417,7 @@ mod test {
    use super::*;
    use float_eq::assert_float_eq;
-    fn assert_map(w: &Wrap, from: Meters, to: Meters) {
+    fn assert_map<R>(w: &Wrap<R>, from: Meters, to: Meters) {
        let mapped = w.map(from);
        assert_float_eq!(mapped.x(), to.x(), abs <= 0.01);
        assert_float_eq!(mapped.y(), to.y(), abs <= 0.01);
--- a/crates/coremem/src/geom/region/primitives.rs
+++ b/crates/coremem/src/geom/region/primitives.rs
@@ -1,13 +1,14 @@
-use crate::geom::{Meters, Vec2, Vec3};
+use crate::geom::Meters;
-use crate::real::Real as _;
+use coremem_cross::real::Real as _;
 use coremem_cross::vec::{Vec2, Vec3};
 use serde::{Serialize, Deserialize};
 use std::fmt::{self, Display};
 use std::ops::Range;
-use super::Region;
+use super::{HasCrossSection, Region};
-#[derive(Copy, Clone, Serialize, Deserialize)]
+#[derive(Copy, Clone, Default, Serialize, Deserialize)]
 pub struct CylinderZ {
    center: Vec2<f32>,
    radius: f32,
@@ -23,7 +24,6 @@ impl CylinderZ {
    }
 }
 #[typetag::serde]
 impl Region for CylinderZ {
    fn contains(&self, p: Meters) -> bool {
        p.xy().distance_sq(self.center) <= self.radius * self.radius
@@ -36,6 +36,31 @@ impl Display for CylinderZ {
    }
 }
 /// describes all 3d space which falls within a given angular space, relative to the Z axis.
 #[derive(Copy, Clone, Default, Serialize, Deserialize)]
 pub struct WedgeZ {
    arg_min: f32,
    arg_max: f32,
 }
 impl WedgeZ {
    pub fn new(arg_min: f32, arg_max: f32) -> Self {
        Self { arg_min, arg_max }
    }
 }
 impl Region for WedgeZ {
    fn contains(&self, p: Meters) -> bool {
        let arg = p.xy().arg();
        // arg is [-pi, pi).
        // if the user supplied some desired range where arg_max > pi, then we need to rotate
        // one revolution "into" that range.
        let arg_next = arg + f32::two_pi();
        (arg >= self.arg_min && arg <= self.arg_max) ||
        (arg_next >= self.arg_min && arg_next <= self.arg_max)
    }
 }
 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)]
 pub struct Torus {
    center: Meters,
@@ -76,7 +101,6 @@ impl Torus {
    }
 }
 #[typetag::serde]
 impl Region for Torus {
    fn contains(&self, p: Meters) -> bool {
        // a torus is the set of all points < distance `r` from the circle of radius `R`,
@@ -86,7 +110,7 @@ impl Region for Torus {
        // 2. Find the point `q` on the circle which is nearest to `p`.
        // 3. Consider the distance from `p` to `q`.
        let rel_p = *p - *self.center;
-        let p_on_plane = rel_p - self.normal.with_mag(self.normal.dot(rel_p));
+        let p_on_plane = rel_p - self.normal.with_mag(self.normal.dot(rel_p)).unwrap();
        let q = if p_on_plane == Vec3::zero() {
            // avoid division by zero.
            // The point is precisely on the axis of the torus.
@@ -94,16 +118,25 @@ impl Region for Torus {
            // and they all give the same answer.
            // Such a point is given by rotating the normal axis by 90 degrees in ANY DIRECTION
            let off_axis = self.normal.arbitrary_orthogonal_vector();
-            off_axis.with_mag(self.major_rad)
+            off_axis.with_mag(self.major_rad).unwrap()
        } else {
-            p_on_plane.with_mag(self.major_rad)
+            p_on_plane.with_mag(self.major_rad).unwrap()
        };
        let distance_to_circle_sq = rel_p.distance_sq(q);
        distance_to_circle_sq < self.minor_rad * self.minor_rad
    }
 }
-#[derive(Copy, Clone, Serialize, Deserialize)]
+impl HasCrossSection for Torus {
    fn cross_section_normal(&self, coord: Meters) -> Vec3<f32> {
        let axis = self.axis();
        let to_coord = *coord - *self.center();
        // this creates a normal which always points "counter-clockwise" along the shape
        axis.cross(to_coord).with_mag(self.cross_section()).unwrap_or_default()
    }
 }
 #[derive(Copy, Clone, Default, Serialize, Deserialize)]
 pub struct Sphere {
    center: Meters,
    rad: f32,
@@ -118,7 +151,6 @@ impl Sphere {
    }
 }
 #[typetag::serde]
 impl Region for Sphere {
    fn contains(&self, p: Meters) -> bool {
        p.distance_sq(*self.center) < self.rad * self.rad
@@ -227,7 +259,6 @@ impl Cube {
    }
 }
 #[typetag::serde]
 impl Region for Cube {
    fn contains(&self, p: Meters) -> bool {
        self.x_range().contains(&p.x()) &&
@@ -237,7 +268,7 @@ impl Region for Cube {
 }
 /// a Spiral traces out a circle on the xy plane as z increases.
-#[derive(Copy, Clone, Serialize, Deserialize)]
+#[derive(Copy, Clone, Default, Serialize, Deserialize)]
 pub struct Spiral {
    /// radius of the spiral
    major: f32,
@@ -256,7 +287,6 @@ impl Spiral {
    }
 }
 #[typetag::serde]
 impl Region for Spiral {
    fn contains(&self, p: Meters) -> bool {
        let revs = p.z() / self.period;
--- a/crates/coremem/src/geom/units.rs
+++ b/crates/coremem/src/geom/units.rs
@@ -1,6 +1,6 @@
-use crate::real::ToFloat;
+use coremem_cross::real::ToFloat;
 use coremem_cross::vec::{Vec3, Vec3u};
 use serde::{Serialize, Deserialize};
 use super::{Vec3, Vec3u};
 use std::fmt::{self, Display};
 use std::cmp::Ordering;
 use std::ops::{Add, Deref, Div, Mul, Neg, Sub};
@@ -188,6 +188,17 @@ impl Index {
    }
 }
 impl Into<Vec3u> for Index {
    fn into(self) -> Vec3u {
        self.0
    }
 }
 impl From<Vec3u> for Index {
    fn from(v: Vec3u) -> Self {
        Self(v)
    }
 }
 impl Coord for Index {
    fn to_meters(&self, feature_size: f32) -> Meters {
        Meters(Vec3::from(self.0) * feature_size)
--- a/crates/coremem/src/lib.rs
+++ b/crates/coremem/src/lib.rs
@@ -7,19 +7,20 @@
 use log::info;
 mod diagnostics;
 pub mod driver;
 pub mod geom;
 pub mod mat;
 pub mod meas;
 pub mod render;
 pub mod sim;
 pub mod stim;
-pub mod util;
+pub mod worker;
 pub use driver::*;
 pub use mat::*;
 pub use sim::*;
-pub use coremem_types::real;
+pub use coremem_cross as cross;
 pub use coremem_cross::real;
 pub use coremem_cross::mat;
 // Some things to keep in mind:
 // B = mu_r*H + M
--- a/crates/coremem/src/mat/bh_ferromagnet.rs
+++ b/crates/coremem/src/mat/bh_ferromagnet.rs
@@ -1,362 +0,0 @@
 use crate::CellState;
 use crate::geom::{Line2d, Vec2, Vec3, Polygon2d};
 use crate::mat::Material;
 use crate::real::Real;
 use crate::sim::StepParametersMut;
 use lazy_static::lazy_static;
 use log::trace;
 use serde::{Serialize, Deserialize};
 use std::any::{Any, TypeId};
 use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::sync::Mutex;
 fn step_linear_ferro<R: Real>(m_mut: &mut Vec3<R>, mh_curve: &MHCurve<R>, context: &CellState<R>, delta_b: Vec3<R>) {
    trace!("step_b enter");
    let (h, m) = (context.h(), *m_mut);
    let target_hm = h + m + delta_b * R::mu0_inv();
    // TODO: this is probably not the best way to generalize a BH curve into 3d.
    let (_hx, mx) = mh_curve.move_to(
        h.x(),
        m.x(),
        target_hm.x(),
    );
    let (_hy, my) = mh_curve.move_to(
        h.y(),
        m.y(),
        target_hm.y(),
    );
    let (_hz, mz) = mh_curve.move_to(
        h.z(),
        m.z(),
        target_hm.z(),
    );
    *m_mut = Vec3::new(mx, my, mz);
    // let ret = Vec3::new(hx, hy, hz);
    trace!("step_b end");
 }
 /// M as a function of H
 #[derive(Clone, PartialEq)]
 struct MHCurve<R> {
    geom: Polygon2d<R>,
 }
 #[allow(unused)]
 impl<R: Real> MHCurve<R> {
    /// Construct a M(H) curve from a sweep from M = 0 to Ms and back down to M = 0.
    /// The curve below M = 0 is derived by symmetry.
    fn new<R2: Real>(points: &[Vec2<R2>]) -> Self {
        let full_pts: Vec<_> =
            points.iter().cloned()
            .chain(points.iter().cloned().map(|p| -p))
            .map(|p| p.cast())
            .collect();
        Self {
            geom: Polygon2d::new(full_pts)
        }
    }
    fn from_bh<R2: Real>(points: &[(R2, R2)]) -> Self {
        let mh_points: Vec<_> = points.iter().cloned().map(|(h, b)| {
            Vec2::new(h, b / R2::mu0() - h)
        }).collect();
        Self::new(&*mh_points)
    }
    fn from_mh<R2: Real>(points: &[(R2, R2)]) -> Self {
        let mh_points: Vec<_> = points.iter().cloned().map(|(h, m)| {
            Vec2::new(h, m)
        }).collect();
        Self::new(&*mh_points)
    }
    /// Return (Hmax, Mmax)
    pub fn extremes(&self) -> Vec2<R> {
        Vec2::new(self.geom.max_x(), self.geom.max_y())
    }
    /// Moves (h, m) towards some location in the MH curve where H + M = target_hm.
    /// Returns `Ok((h, m))` if complete; `Err((h, m))` if there's more work to be done (call it
    /// again).
    fn step_toward(&self, h: R, m: R, target_hm: R) -> Result<Vec2<R>, Vec2<R>> {
        let is_ascending = match target_hm.partial_cmp(&(h + m)).unwrap_or_else(|| panic!("{} {}", h, m)) {
            Ordering::Greater => true,
            Ordering::Less => false,
            _ => return Ok(Vec2::new(h, m))
        };
        if (is_ascending && m == self.geom.max_y()) || (!is_ascending && m == self.geom.min_y()) {
            // Fully saturated. m is fixed, while h moves freely
            return Ok(Vec2::new(target_hm - m, m));
        }
        // Locate the segment which would contain the current point
        let mut segments = self.geom.segments();
        let active_segment = loop {
            let line = segments.next().unwrap_or_else(|| {
                panic!("failed to find segment for h:{}, m:{}, {:?}", h, m, self.geom.segments().collect::<Vec<_>>());
            });
            if line.contains_y(m) && line.is_ascending() == is_ascending {
                if line.contains_x(h) && line.distance_sq(Vec2::new(h, m)) < R::from_primitive(1.0e-6) {
                    // (h, m) resides on this line
                    break line;
                } else {
                    // need to move the point toward this line
                    let h_intercept = line.x(m);
                    break Line2d::new(Vec2::new(h, m), Vec2::new(h_intercept, m));
                }
            }
        };
        trace!("active segment: {:?}", active_segment);
        // Find some m(h) on the active_segment such that sum(h) = h + m(h) = target_hm
        let sum_h = active_segment + Line2d::new(Vec2::zero(), Vec2::unit());
        trace!("sum_h: {:?}", sum_h);
        let new_h = if sum_h.to().y() != sum_h.from().y() {
            sum_h.move_toward_y_unclamped(h, target_hm)
        } else {
            // avoid a division-by-zero.
            // We could be anywhere along this line, but we prefer the endpoint
            // so as to escape out of any permanent loops
            active_segment.to().x()
        };
        trace!("new_h: {}", new_h);
        if sum_h.contains_x(new_h) {
            // the segment contains a point with the target H+M
            Ok(active_segment.at_x(new_h))
        } else {
            // the segment doesn't contain the desired point: clamp and try the next segment
            Err(active_segment.clamp_by_x(new_h))
        }
    }
    fn move_to(&self, mut h: R, mut m: R, target_hm: R) -> (R, R) {
        let mut i = 0;
        loop {
            i += 1;
            match self.step_toward(h, m, target_hm) {
                Ok(v) => break (v.x(), v.y()),
                Err(v) => {
                    h = v.x();
                    m = v.y();
                },
            }
            if i % 2048 == 0 {
                panic!("unusually high iteration count without converging: {}. args: {}, {}, {}", i, h, m, target_hm);
            }
        }
    }
 }
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Ferroxcube3R1<R> {
    m: Vec3<R>,
 }
 impl<R: Real> Ferroxcube3R1<R> {
    pub fn new() -> Self {
        Self::default()
    }
 }
 impl<R: Real> Ferroxcube3R1<R> {
    fn curve() -> &'static MHCurve<R> {
        lazy_static! {
            static ref CURVES: Mutex<HashMap<TypeId, Box<dyn Any + Send>>> = Mutex::new(HashMap::new());
        }
        let mut lock = CURVES.lock().unwrap();
        let curve = lock.entry(TypeId::of::<R>()).or_insert_with(|| {
            Box::new(MHCurve::<R>::from_bh(&[
                (  35.0, 0.0),
                (  50.0, 0.250),
                ( 100.0, 0.325),
                ( 200.0, 0.350),
                (1000.0, 0.390),
                // Falling
                ( 200.0, 0.360),
                ( 100.0, 0.345),
                (  50.0, 0.340),
                (   0.0, 0.325),
            ]))
        }).downcast_ref::<MHCurve<R>>().unwrap();
        unsafe { std::mem::transmute::<&MHCurve<R>, &'static MHCurve<R>>(curve) }
    }
 }
 impl<R: Real> Material<R> for Ferroxcube3R1<R> {
    fn step_b(&mut self, context: &CellState<R>, delta_b: Vec3<R>) {
        step_linear_ferro(&mut self.m, Self::curve(), context, delta_b)
    }
    fn m(&self) -> Vec3<R> {
        self.m
    }
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        StepParametersMut::default().with_conductivity(Vec3::uniform(1e-3))
    }
 }
 /// Simple, square-loop ferrite
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
 pub struct MinimalSquare<R> {
    m: Vec3<R>,
 }
 impl<R: Real> MinimalSquare<R> {
    fn curve() -> &'static MHCurve<R> {
        lazy_static! {
            static ref CURVES: Mutex<HashMap<TypeId, Box<dyn Any + Send>>> = Mutex::new(HashMap::new());
        }
        let mut lock = CURVES.lock().unwrap();
        let curve = lock.entry(TypeId::of::<R>()).or_insert_with(|| {
            Box::new(MHCurve::<R>::from_bh(&[
                (  1.0,       0.0),
                (  2.0, 1000000.0),
                // Falling
                (  0.0,  900000.0),
            ]))
        }).downcast_ref::<MHCurve<R>>().unwrap();
        unsafe { std::mem::transmute::<&MHCurve<R>, &'static MHCurve<R>>(curve) }
    }
 }
 impl<R: Real> Material<R> for MinimalSquare<R> {
    fn step_b(&mut self, context: &CellState<R>, delta_b: Vec3<R>) {
        step_linear_ferro(&mut self.m, Self::curve(), context, delta_b)
    }
    fn m(&self) -> Vec3<R> {
        self.m
    }
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        StepParametersMut::default().with_conductivity(Vec3::uniform(1e-3))
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    fn mh_curve_for_test() -> MHCurve<f32> {
        MHCurve::new(&[
            // rising
            Vec2::new( 10.0,    0.0),
            Vec2::new( 20.0,  100.0),
            Vec2::new( 30.0,  150.0),
            // falling
            Vec2::new(  0.0,  120.0),
            // negative rising
            Vec2::new(-10.0,    0.0),
            Vec2::new(-20.0, -100.0),
            Vec2::new(-30.0, -150.0),
            // negative falling
            Vec2::new(  0.0, -120.0),
        ])
    }
    fn assert_step_toward_symmetric(h: f32, m: f32, target_mh: f32, target: Result<Vec2<f32>, Vec2<f32>>) {
        let curve = mh_curve_for_test();
        let target = match target {
            Ok(v) => Ok(v),
            Err(v) => Err(v),
        };
        let neg_target = match target {
            Ok(v) => Ok(-v),
            Err(v) => Err(-v),
        };
        assert_eq!(curve.step_toward(h, m, target_mh), target);
        assert_eq!(curve.step_toward(-h, -m, -target_mh), neg_target);
    }
    fn assert_move_to_symmetric(h: f32, m: f32, target_mh: f32, target: (f32, f32)) {
        let curve = mh_curve_for_test();
        assert_eq!(curve.move_to(h, m, target_mh), target);
        assert_eq!(curve.move_to(-h, -m, -target_mh), (-target.0, -target.1));
    }
    #[test]
    fn mh_curve_move_from_inner_to_inner() {
        assert_step_toward_symmetric(0.0, 0.0, 5.0, Ok(Vec2::new(5.0, 0.0)));
        assert_step_toward_symmetric(0.0, 5.0, 10.0, Ok(Vec2::new(5.0, 5.0)));
        assert_step_toward_symmetric(-5.0, 5.0, -3.0, Ok(Vec2::new(-8.0, 5.0)));
        assert_step_toward_symmetric(-5.0, 5.0, 7.0, Ok(Vec2::new(2.0, 5.0)));
        assert_step_toward_symmetric(5.0, -5.0, -3.0, Ok(Vec2::new(2.0, -5.0)));
        assert_step_toward_symmetric(5.0, -5.0, 3.0, Ok(Vec2::new(8.0, -5.0)));
    }
    #[test]
    fn mh_curve_magnetize_along_edge() {
        // start of segment NOOP
        assert_step_toward_symmetric(10.0, 0.0, 10.0, Ok(Vec2::new(10.0, 0.0)));
        // start of segment to middle of segment
        assert_step_toward_symmetric(10.0, 0.0, 32.0, Ok(Vec2::new(12.0, 20.0)));
        // middle of segment NOOP
        assert_step_toward_symmetric(12.0, 20.0, 32.0, Ok(Vec2::new(12.0, 20.0)));
        // middle of segment to middle of segment
        assert_step_toward_symmetric(12.0, 20.0, 54.0, Ok(Vec2::new(14.0, 40.0)));
        // middle of segment to end of segment
        assert_step_toward_symmetric(12.0, 20.0, 120.0, Err(Vec2::new(20.0, 100.0)));
    }
    #[test]
    fn mh_curve_demagnetize_along_edge() {
        // start of segment NOOP
        assert_step_toward_symmetric(30.0, 150.0, 180.0, Ok(Vec2::new(30.0, 150.0)));
        // start of segment to middle of segment
        assert_step_toward_symmetric(30.0, 150.0, 160.0, Ok(Vec2::new(20.0, 140.0)));
        // middle of segment NOOP
        assert_step_toward_symmetric(20.0, 140.0, 160.0, Ok(Vec2::new(20.0, 140.0)));
        // middle of segment to middle of segment
        assert_step_toward_symmetric(20.0, 140.0, 140.0, Ok(Vec2::new(10.0, 130.0)));
        // middle of segment to end of segment
        assert_step_toward_symmetric(20.0, 140.0, 120.0, Err(Vec2::new(0.0, 120.0)));
    }
    #[test]
    fn mh_curve_magnetize_across_edges() {
        // Rising from start to middle
        assert_move_to_symmetric(10.0, 0.0, 132.0, (22.0, 110.0));
        // Rising from start to saturation
        assert_move_to_symmetric(10.0, 0.0, 180.0, (30.0, 150.0));
        // Rising from start to post-saturation
        assert_move_to_symmetric(10.0, 0.0, 400.0, (250.0, 150.0));
        // Rising from negative saturation to start
        assert_move_to_symmetric(-30.0, -150.0, 10.0, (10.0, 0.0));
        // Rising from negative post-saturation to start
        assert_move_to_symmetric(-250.0, -150.0, 10.0, (10.0, 0.0));
        // Rising from negative middle to middle
        assert_move_to_symmetric(-22.0, -110.0, 132.0, (22.0, 110.0));
    }
    #[test]
    fn mh_curve_demagnetize_across_edges() {
        // Falling from saturation to start
        assert_move_to_symmetric(30.0, 150.0, 120.0, (0.0, 120.0));
        // Falling from post-saturation to post-saturation
        assert_move_to_symmetric(250.0, 150.0, 200.0, (50.0, 150.0));
        // Falling from post-saturation to saturation
        assert_move_to_symmetric(250.0, 150.0, 180.0, (30.0, 150.0));
        // Falling from post-saturation to start
        assert_move_to_symmetric(250.0, 150.0, 120.0, (0.0, 120.0));
        // Falling from post-saturation to negative saturation
        assert_move_to_symmetric(250.0, 150.0, -180.0, (-30.0, -150.0));
        // Falling from post-saturation to negative post-saturation
        assert_move_to_symmetric(250.0, 150.0, -400.0, (-250.0, -150.0));
        // Falling from interior to middle
        assert_move_to_symmetric(28.0, 130.0, 140.0, (10.0, 130.0));
        // Falling from interior to middle
        assert_move_to_symmetric(28.0, 130.0, 130.0, (5.0, 125.0));
    }
    /// Float rounding would cause `inf`s, which manifested as infinite looping.
    #[test]
    fn regression_no_convergence_3r1() {
        let curve = Ferroxcube3R1::curve();
        curve.move_to(-202.04596, -278400.53, -278748.66);
    }
 }
--- a/crates/coremem/src/mat/db.rs
+++ b/crates/coremem/src/mat/db.rs
@@ -1,35 +0,0 @@
 //! database of common materials
 use crate::geom::Vec3;
 use crate::mat::{AnisomorphicConductor, IsomorphicConductor, LinearMagnet, Ferroxcube3R1, MinimalSquare};
 use crate::real::Real;
 pub fn conductor<R: Real, R2: Real>(conductivity: R2) -> IsomorphicConductor<R> {
    IsomorphicConductor::new(conductivity.cast())
 }
 pub fn anisotropic_conductor<R>(conductivity: Vec3<R>) -> AnisomorphicConductor<R> {
    AnisomorphicConductor::new(conductivity)
 }
 pub fn copper<R: Real>() -> IsomorphicConductor<R> {
    conductor(50_000_000.0)
 }
 // See https://en.wikipedia.org/wiki/Permeability_(electromagnetism)#Values_for_some_common_materials
 /// This is a simplified form of iron annealed in H.
 pub fn linear_annealed_iron<R: Real>() -> LinearMagnet<R> {
    LinearMagnet::new(200_000.0)
 }
 /// This is a simplified form of iron
 pub fn linear_iron<R: Real>() -> LinearMagnet<R> {
    LinearMagnet::new(5000.0)
 }
 /// https://www.ferroxcube.com/upload/media/product/file/MDS/3r1.pdf
 pub fn ferroxcube_3r1<R: Real>() -> Ferroxcube3R1<R> {
    Ferroxcube3R1::default()
 }
 pub fn minimal_square_ferrite<R: Real>() -> MinimalSquare<R> {
    MinimalSquare::default()
 }
--- a/crates/coremem/src/mat/linear.rs
+++ b/crates/coremem/src/mat/linear.rs
@@ -1,101 +0,0 @@
 use crate::CellState;
 use crate::geom::Vec3;
 use crate::mat::Material;
 use crate::real::Real;
 use serde::{Serialize, Deserialize};
 /// Material which can be magnetized, but has no hysteresis and no coercivity.
 #[derive(Copy, Clone, Default, PartialEq, Serialize, Deserialize)]
 pub struct LinearMagnet<R> {
    /// \mu_r
    relative_permeability: Vec3<R>,
    m: Vec3<R>,
 }
 impl<R: Real> LinearMagnet<R> {
    pub fn new<R2: Real>(relative_permeability: R2) -> Self {
        Self {
            relative_permeability: Vec3::uniform(relative_permeability).cast(),
            m: Vec3::zero(),
        }
    }
    pub fn new_anisotropic<R2: Real>(relative_permeability: Vec3<R2>) -> Self {
        Self {
            relative_permeability: relative_permeability.cast(),
            m: Vec3::zero()
        }
    }
 }
 impl<R: Real> Material<R> for LinearMagnet<R> {
    fn m(&self) -> Vec3<R> {
        self.m
    }
    fn step_b(&mut self, _context: &CellState<R>, delta_b: Vec3<R>) {
        //```tex
        // $B = \mu_0 (H + M) = \mu_0 \mu_r H$
        // $\mu_r H = H + M$
        // $M = (\mu_r - 1) H$
        // $B = \mu_0 (1/(\mu_r - 1) M + M)$
        // $B = \mu_0 \mu_r/(\mu_r - 1) M$
        //```
        let mu_r = self.relative_permeability;
        let delta_m = (delta_b*R::mu0_inv()).elem_mul(mu_r - Vec3::unit()).elem_div(mu_r);
        self.m += delta_m;
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use float_eq::assert_float_eq;
    #[test]
    fn linear_magnet_steep() {
        let mut mag = LinearMagnet::<f64>::new(5000.0);
        // M = B/mu0 * (mu_r-1)/(mu_r)
        mag.step_b(&CellState::default(), Vec3::uniform(1.0));
        assert_float_eq!(mag.m().x(), 795615.56, abs <= 1.0);
        mag.step_b(&CellState::default(), Vec3::uniform(1.0));
        assert_float_eq!(mag.m().x(), 1591231.12, abs <= 1.0);
        mag.step_b(&CellState::default(), Vec3::uniform(-1.0));
        assert_float_eq!(mag.m().x(), 795615.56, abs <= 1.0);
        mag.step_b(&CellState::default(), Vec3::uniform(-1.0));
        assert_float_eq!(mag.m().x(), 0.0, abs <= 1.0);
    }
    #[test]
    fn linear_magnet_shallow() {
        let mut mag = LinearMagnet::<f64>::new(2.0);
        mag.step_b(&CellState::default(), Vec3::uniform(1.0));
        assert_float_eq!(mag.m().x(), 397887.36, abs <= 1.0);
        mag.step_b(&CellState::default(), Vec3::uniform(-3.0));
        assert_float_eq!(mag.m().x(), -795774.72, abs <= 1.0);
    }
    #[test]
    fn linear_magnet_accuracy() {
        let mut mag = LinearMagnet::<f32>::new(5000.0);
        let mut b = Vec3::zero();
        while b.x() < 1.0 {
            let delta_b = Vec3::uniform(0.00002);
            mag.step_b(&CellState::default(), delta_b);
            b += delta_b;
        }
        while b.x() > 0.0 {
            let delta_b = Vec3::uniform(-0.00001);
            mag.step_b(&CellState::default(), delta_b);
            b += delta_b;
        }
        // TODO: This error is WAY too big! 
        // Need to make sure that M+H == mu0*B always
        assert_float_eq!(mag.m().x(), b.x() * f32::mu0_inv(), abs <= 900.0);
    }
 }
--- a/crates/coremem/src/mat/mb_ferromagnet.rs
+++ b/crates/coremem/src/mat/mb_ferromagnet.rs
@@ -1,195 +0,0 @@
 use crate::geom::{Line2d, Vec2, Vec3};
 use crate::mat::Material;
 use crate::real::Real;
 use crate::sim::CellState;
 use serde::{Serialize, Deserialize};
 /// M(B) parallelogram
 ///
 ///```text
 ///     ____________
 ///    /           /
 ///   /     .     /
 ///  /           /
 /// /___________/
 /// ```
 ///
 /// The `.` depicts (0, 0). X axis is B; y axis is M.
 /// As B increases, M remains constant until it hits an edge.
 /// Then M rises up to its max.
 /// Same thing happens on the left edge, as B decreases and M falls to its min.
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
 pub struct MBPgram<R> {
    /// Vertical range of the graph
    pub max_m: R,
    /// X coordinate at which the upward slope starts
    pub b_start: R,
    /// X coordinate at which the upward slope ends
    pub b_end: R,
 }
 impl<R: Real> MBPgram<R> {
    pub fn new(b_start: R, b_end: R, max_m: R) -> Self {
        Self { b_start, b_end, max_m }
    }
    /// Return the new `M`
    pub fn move_b(&self, m: R, target_b: R) -> R {
        let right_edge = Line2d::new(
            Vec2::new(self.b_start, -self.max_m),
            Vec2::new(self.b_end, self.max_m),
        );
        let left_edge = Line2d::new(
            Vec2::new(-self.b_start, self.max_m),
            Vec2::new(-self.b_end, -self.max_m),
        );
        // m must be at least this much:
        let min_m = right_edge.clamp_by_x(target_b).y();
        // m must be no more than this:
        let max_m = left_edge.clamp_by_x(target_b).y();
        m.max_or_undefined(min_m).min_or_undefined(max_m)
    }
 }
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
 pub struct NativeMBFerromagnet<R> {
    m: Vec3<R>,
    curve: MBPgram<R>,
 }
 impl<R: Real> NativeMBFerromagnet<R> {
    pub fn new<R2: Real>(b_start: R2, b_end: R2, max_m: R2) -> Self {
        Self {
            m: Vec3::zero(),
            curve: MBPgram::new(b_start.cast(), b_end.cast(), max_m.cast()),
        }
    }
    pub fn curve(&self) -> MBPgram<R> {
        self.curve
    }
 }
 impl<R: Real> Material<R> for NativeMBFerromagnet<R> {
    fn step_b(&mut self, context: &CellState<R>, delta_b: Vec3<R>) {
        let target_b = context.with_m(self.m).b() + delta_b;
        // println!("step_b {}", target_b);
        self.m = Vec3::new(
            self.curve.move_b(self.m.x(), target_b.x()),
            self.curve.move_b(self.m.y(), target_b.y()),
            self.curve.move_b(self.m.z(), target_b.z()),
        );
    }
    fn m(&self) -> Vec3<R> {
        self.m
    }
 }
 #[derive(Default, Copy, Clone, PartialEq, Serialize, Deserialize)]
 pub struct SpirvMBFerromagnet<R>(NativeMBFerromagnet<R>);
 impl<R: Real> SpirvMBFerromagnet<R> {
    pub fn new<R2: Real>(b_start: R2, b_end: R2, max_m: R2) -> Self {
        Self(NativeMBFerromagnet::new(b_start, b_end, max_m))
    }
    pub fn curve(&self) -> MBPgram<R> {
        self.0.curve()
    }
 }
 impl<R: Real> Material<R> for SpirvMBFerromagnet<R> {
    fn step_b(&mut self, context: &CellState<R>, delta_b: Vec3<R>) {
        let target_b = context.with_m(self.m()).b() + delta_b;
        let curve = coremem_types::mat::MBPgram::new(
            self.0.curve.b_start,
            self.0.curve.b_end,
            self.0.curve.max_m,
        );
        // println!("step_b {}", target_b);
        self.0.m = Vec3::new(
            curve.move_b(self.0.m.x(), target_b.x()),
            curve.move_b(self.0.m.y(), target_b.y()),
            curve.move_b(self.0.m.z(), target_b.z()),
        );
    }
    fn m(&self) -> Vec3<R> {
        self.0.m()
    }
 }
 // XXX: for debugging, use the same MBFerromagnet impl as we do in spirv impl.
 // pub type MBFerromagnet<R> = SpirvMBFerromagnet<R>;
 pub type MBFerromagnet<R> = NativeMBFerromagnet<R>;
 #[cfg(test)]
 mod test {
    use super::*;
    use float_eq::assert_float_eq;
    #[test]
    fn curve_interior() {
        let curve = MBPgram::new(4.0, 6.0, 20.0f32);
        assert_float_eq!(curve.move_b(0.0, 2.0), 0.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(0.0, 5.0), 0.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(1.0, 5.0), 1.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(20.0, 5.0), 20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(-20.0, 4.0), -20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(-20.0, -6.0), -20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(20.0, -4.0), 20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(10.0, -2.0), 10.0, abs <= 1e-5);
    }
    #[test]
    fn curve_exterior() {
        let curve = MBPgram::new(4.0, 6.0, 20.0f32);
        assert_float_eq!(curve.move_b(0.0, 6.0), 20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(0.0, 7.0), 20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(0.0, -6.0), -20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(0.0, -7.0), -20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(2.0, -6.0), -20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(20.0, -6.0), -20.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(20.0, -5.0), 0.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(20.0, -4.5), 10.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(-15.0, 4.5), -10.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(-15.0, 5.0), 0.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(-15.0, 5.5), 10.0, abs <= 1e-5);
        assert_float_eq!(curve.move_b(-15.0, 7.5), 20.0, abs <= 1e-5);
    }
    #[test]
    fn curve_3r1() {
        // slope of 3r1 is about M=793210*B
        // This is almost identical to iron (795615)!
        let curve = MBPgram::new(-0.3899, 0.3900, 310000f32);
        // magnetizing:
        // v.s. 198893 in B(H) curve
        assert_float_eq!(curve.move_b(0.0, 0.250), 198703.0, abs <= 1.0);
        // v.s. 278321 in B(H) curve
        assert_float_eq!(curve.move_b(198703.0, 0.350), 278201.0, abs <= 1.0);
        assert_float_eq!(curve.move_b(278201.0, 0.390), 310000.0, abs <= 1.0);
        // de-magnetizing:
        // From saturation, decreasing B causes NO decrease in M: instead, it causes a decrease in
        // H. This is probably BAD: in the B(H) curve, a large change in H always causes a large
        // change in B. The movement of H here is likely to induce current, whereas it SHOULDN'T.
        assert_float_eq!(curve.move_b(310000.0, 0.38995), 310000.0, abs <= 1.0);
        // v.s. 258626 in B(H); H = 220
        assert_float_eq!(curve.move_b(310000.0, 0.325), 258406.0, abs <= 1.0);
        // here's where H crosses 0 (v.s. B=0.325 in the B(H) curve... quite a difference)
        assert_float_eq!(curve.move_b(310000.0, 0.050), 39788.438, abs <= 1.0);
        // v.s. 35.0 in B(H)
        assert_float_eq!(curve.move_b(310000.0, 0.0), 39.75, abs <= 1.0);
        // negative magnetization:
        // erase the magnetization: H = -40
        assert_float_eq!(curve.move_b(310000.0, -0.00005), 0.0, abs <= 0.1);
        // the magnetization has been completely erased:
        assert_float_eq!(curve.move_b(310000.0, -0.25), -198703.0, abs <= 1.0);
    }
 }
--- a/crates/coremem/src/mat/mod.rs
+++ b/crates/coremem/src/mat/mod.rs
@@ -1,327 +0,0 @@
 use crate::CellState;
 use crate::geom::Vec3;
 use crate::real::Real;
 use crate::sim::{PmlParameters, PmlState, StepParameters, StepParametersMut};
 use enum_dispatch::enum_dispatch;
 use serde::{Serialize, Deserialize};
 pub mod db;
 mod bh_ferromagnet;
 mod mb_ferromagnet;
 mod linear;
 pub use bh_ferromagnet::*;
 pub use mb_ferromagnet::*;
 pub use coremem_types::mat::{AnisomorphicConductor, Ferroxcube3R1MH, IsoConductorOr, IsomorphicConductor, MHPgram};
 pub use linear::*;
 #[enum_dispatch]
 pub trait Material<R: Real> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        // by default, behave as a vacuum
        StepParametersMut::default()
    }
    /// Return the magnetization.
    fn m(&self) -> Vec3<R> {
        Vec3::zero()
    }
    /// Called just before magnetic field is updated. Optionally change any internal state (e.g. magnetization).
    fn step_b(&mut self, _context: &CellState<R>, _delta_b: Vec3<R>) {
    }
 }
 pub trait MaterialExt<R> {
    fn step_parameters<'a>(&'a self) -> StepParameters<'a, R>;
    fn conductivity(&self) -> Vec3<R>;
 }
 impl<R: Real, M: Material<R>> MaterialExt<R> for M {
    fn step_parameters<'a>(&'a self) -> StepParameters<'a, R> {
        unsafe { &mut *(self as *const M as *mut M) }.step_parameters_mut().into()
    }
    fn conductivity(&self) -> Vec3<R> {
        self.step_parameters().conductivity()
    }
 }
 /// Capable of capturing all field-related information about a material at any
 /// snapshot moment-in-time. Useful for serializing state.
 #[derive(Clone, Default, PartialEq, Serialize, Deserialize)]
 pub struct Static<R> {
    pub conductivity: Vec3<R>,
    // pub pml: Option<(PmlState, PmlParameters)>,
    pub m: Vec3<R>,
 }
 impl<R: Real> Static<R> {
    pub fn from_material<M: Material<R>>(m: &M) -> Self {
        let p = m.step_parameters();
        Self {
            conductivity: p.conductivity(),
            // pml: p.pml().map(|(s, p)| (*s, p)),
            m: m.m(),
        }
    }
    // pub fn from_pml(pseudo_conductivity: Vec3<flt::Real>) -> Self {
    //     Self::from_material(&Pml::new(pseudo_conductivity))
    // }
 }
 impl<R: Real> Material<R> for Static<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        StepParametersMut::new(
            self.conductivity,
            None, // self.pml.as_mut().map(|(s, p)| (s, *p)),
        )
    }
    fn m(&self) -> Vec3<R> {
        self.m
    }
 }
 impl<R: Real, T> From<T> for Static<R>
 where T: Into<GenericMaterial<R>>
 {
    fn from(mat: T) -> Self {
        let generic = mat.into();
        Self::from_material(&generic)
    }
 }
 #[derive(Clone, Default, PartialEq, Serialize, Deserialize)]
 pub struct Pml<R>(PmlState<R>, PmlParameters<R>);
 impl<R: Real> Pml<R> {
    pub fn new<R2: Real>(pseudo_conductivity: Vec3<R2>) -> Self {
        Self(PmlState::new(), PmlParameters::new(pseudo_conductivity))
    }
 }
 impl<R: Real> Material<R> for Pml<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        StepParametersMut::default().with_pml(&mut self.0, self.1)
    }
 }
 // #[enum_dispatch(Material)]
 #[derive(Clone, PartialEq, Serialize, Deserialize)]
 pub enum GenericMaterial<R> {
    Conductor(AnisomorphicConductor<R>),
    LinearMagnet(LinearMagnet<R>),
    Pml(Pml<R>),
    MBFerromagnet(MBFerromagnet<R>),
    Ferroxcube3R1(Ferroxcube3R1<R>),
    MinimalSquare(MinimalSquare<R>),
 }
 impl<R: Real> Default for GenericMaterial<R> {
    fn default() -> Self {
        Self::Conductor(Default::default())
    }
 }
 impl<R> From<AnisomorphicConductor<R>> for GenericMaterial<R> {
    fn from(inner: AnisomorphicConductor<R>) -> Self {
        Self::Conductor(inner)
    }
 }
 impl<R: Real, V: Real> From<IsomorphicConductor<V>> for GenericMaterial<R> {
    fn from(inner: IsomorphicConductor<V>) -> Self {
        let iso_r = IsomorphicConductor::new(inner.iso_conductivity().cast::<R>());
        Self::Conductor(iso_r.into())
    }
 }
 impl<R> From<LinearMagnet<R>> for GenericMaterial<R> {
    fn from(inner: LinearMagnet<R>) -> Self {
        Self::LinearMagnet(inner)
    }
 }
 impl<R> From<Pml<R>> for GenericMaterial<R> {
    fn from(inner: Pml<R>) -> Self {
        Self::Pml(inner)
    }
 }
 impl<R> From<MBFerromagnet<R>> for GenericMaterial<R> {
    fn from(inner: MBFerromagnet<R>) -> Self {
        Self::MBFerromagnet(inner)
    }
 }
 impl<R> From<Ferroxcube3R1<R>> for GenericMaterial<R> {
    fn from(inner: Ferroxcube3R1<R>) -> Self {
        Self::Ferroxcube3R1(inner)
    }
 }
 impl<R> From<MinimalSquare<R>> for GenericMaterial<R> {
    fn from(inner: MinimalSquare<R>) -> Self {
        Self::MinimalSquare(inner)
    }
 }
 impl<R: Real> Material<R> for GenericMaterial<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        use GenericMaterial::*;
        match self {
            Conductor(inner) => inner.step_parameters_mut(),
            LinearMagnet(inner) => inner.step_parameters_mut(),
            Pml(inner) => inner.step_parameters_mut(),
            MBFerromagnet(inner) => inner.step_parameters_mut(),
            Ferroxcube3R1(inner) => inner.step_parameters_mut(),
            MinimalSquare(inner) => inner.step_parameters_mut(),
        }
    }
    /// Return the magnetization.
    fn m(&self) -> Vec3<R> {
        use GenericMaterial::*;
        match self {
            Conductor(inner) => inner.m(),
            LinearMagnet(inner) => inner.m(),
            Pml(inner) => inner.m(),
            MBFerromagnet(inner) => inner.m(),
            Ferroxcube3R1(inner) => Material::m(inner),
            MinimalSquare(inner) => Material::m(inner),
        }
    }
    /// Called just before magnetic field is updated. Optionally change any internal state (e.g. magnetization).
    fn step_b(&mut self, context: &CellState<R>, delta_b: Vec3<R>) {
        use GenericMaterial::*;
        match self {
            Conductor(inner) => inner.step_b(context, delta_b),
            LinearMagnet(inner) => inner.step_b(context, delta_b),
            Pml(inner) => inner.step_b(context, delta_b),
            MBFerromagnet(inner) => inner.step_b(context, delta_b),
            Ferroxcube3R1(inner) => inner.step_b(context, delta_b),
            MinimalSquare(inner) => inner.step_b(context, delta_b),
        }
    }
 }
 // #[enum_dispatch(Material)]
 #[derive(Clone, Serialize, Deserialize)]
 pub enum GenericMaterialNoPml<R> {
    Conductor(AnisomorphicConductor<R>),
    LinearMagnet(LinearMagnet<R>),
    MBFerromagnet(MBFerromagnet<R>),
    Ferroxcube3R1(Ferroxcube3R1<R>),
    MinimalSquare(MinimalSquare<R>),
 }
 impl<R: Real> Default for GenericMaterialNoPml<R> {
    fn default() -> Self {
        AnisomorphicConductor::default().into()
    }
 }
 impl<R> From<AnisomorphicConductor<R>> for GenericMaterialNoPml<R> {
    fn from(inner: AnisomorphicConductor<R>) -> Self {
        Self::Conductor(inner)
    }
 }
 impl<R: Real> Material<R> for GenericMaterialNoPml<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        use GenericMaterialNoPml::*;
        match self {
            Conductor(inner) => inner.step_parameters_mut(),
            LinearMagnet(inner) => inner.step_parameters_mut(),
            MBFerromagnet(inner) => inner.step_parameters_mut(),
            Ferroxcube3R1(inner) => inner.step_parameters_mut(),
            MinimalSquare(inner) => inner.step_parameters_mut(),
        }
    }
    /// Return the magnetization.
    fn m(&self) -> Vec3<R> {
        use GenericMaterialNoPml::*;
        match self {
            Conductor(inner) => inner.m(),
            LinearMagnet(inner) => inner.m(),
            MBFerromagnet(inner) => inner.m(),
            Ferroxcube3R1(inner) => Material::m(inner),
            MinimalSquare(inner) => Material::m(inner),
        }
    }
    /// Called just before magnetic field is updated. Optionally change any internal state (e.g. magnetization).
    fn step_b(&mut self, context: &CellState<R>, delta_b: Vec3<R>) {
        use GenericMaterialNoPml::*;
        match self {
            Conductor(inner) => inner.step_b(context, delta_b),
            LinearMagnet(inner) => inner.step_b(context, delta_b),
            MBFerromagnet(inner) => inner.step_b(context, delta_b),
            Ferroxcube3R1(inner) => inner.step_b(context, delta_b),
            MinimalSquare(inner) => inner.step_b(context, delta_b),
        }
    }
 }
 /// Materials which have only 1 Vec3.
 // #[enum_dispatch(Material)]
 #[derive(Clone, Serialize, Deserialize)]
 pub enum GenericMaterialOneField<R> {
    Conductor(AnisomorphicConductor<R>),
    Ferroxcube3R1(Ferroxcube3R1<R>),
    MinimalSquare(MinimalSquare<R>),
 }
 impl<R: Real> Default for GenericMaterialOneField<R> {
    fn default() -> Self {
        AnisomorphicConductor::default().into()
    }
 }
 impl<R> From<AnisomorphicConductor<R>> for GenericMaterialOneField<R> {
    fn from(inner: AnisomorphicConductor<R>) -> Self {
        Self::Conductor(inner)
    }
 }
 impl<R: Real> Material<R> for GenericMaterialOneField<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        use GenericMaterialOneField::*;
        match self {
            Conductor(inner) => inner.step_parameters_mut(),
            Ferroxcube3R1(inner) => inner.step_parameters_mut(),
            MinimalSquare(inner) => inner.step_parameters_mut(),
        }
    }
    /// Return the magnetization.
    fn m(&self) -> Vec3<R> {
        use GenericMaterialOneField::*;
        match self {
            Conductor(inner) => inner.m(),
            Ferroxcube3R1(inner) => Material::m(inner),
            MinimalSquare(inner) => Material::m(inner),
        }
    }
    /// Called just before magnetic field is updated. Optionally change any internal state (e.g. magnetization).
    fn step_b(&mut self, context: &CellState<R>, delta_b: Vec3<R>) {
        use GenericMaterialOneField::*;
        match self {
            Conductor(inner) => inner.step_b(context, delta_b),
            Ferroxcube3R1(inner) => inner.step_b(context, delta_b),
            MinimalSquare(inner) => inner.step_b(context, delta_b),
        }
    }
 }
 // coremem_types adapters
 impl<R: Real> Material<R> for AnisomorphicConductor<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        let c = coremem_types::mat::Material::conductivity(self);
        StepParametersMut::default().with_conductivity(c)
    }
 }
 impl<R: Real> Material<R> for IsomorphicConductor<R> {
    fn step_parameters_mut<'a>(&'a mut self) -> StepParametersMut<'a, R> {
        let c = coremem_types::mat::Material::conductivity(self);
        StepParametersMut::default().with_conductivity(c)
    }
 }
--- a/crates/coremem/src/meas.rs
+++ b/crates/coremem/src/meas.rs
@@ -1,84 +1,217 @@
-use crate::geom::{Meters, Region, Torus, Vec3, WorldRegion};
+use crate::geom::{HasCrossSection, Meters, Region, Torus, WorldRegion};
 use crate::real::{Real as _, ToFloat as _};
-use crate::sim::SampleableSim;
+use crate::cross::vec::{Vec3, Vec3u};
-use dyn_clone::{self, DynClone};
+use crate::sim::AbstractSim;
 use indexmap::IndexMap;
 use serde::{Serialize, Deserialize};
 use std::ops::AddAssign;
-// TODO: remove this Clone and Send requirement? Have Measurements be shared by-reference across
+// TODO: do we really need both Send and Sync?
-// threads? i.e. Sync, and no Clone
+pub trait AbstractMeasurement<S>: Send + Sync {
-#[typetag::serde(tag = "type")]
+    fn key_value(&self, state: &S) -> Vec<Measurement>;
 pub trait AbstractMeasurement: Send + Sync + DynClone {
    fn eval(&self, state: &dyn SampleableSim) -> String;
    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String>;
 }
 dyn_clone::clone_trait_object!(AbstractMeasurement);
-pub fn eval_multiple_kv(state: &dyn SampleableSim, meas: &[Box<dyn AbstractMeasurement>]) -> IndexMap<String, String> {
+pub fn as_dyn_measurements<S, M: AbstractMeasurement<S>>(meas: &[M]) -> Vec<&dyn AbstractMeasurement<S>> {
-    let mut r = IndexMap::new();
+    meas.into_iter().map(|m| m as &dyn AbstractMeasurement<S>).collect()
-    for m in meas {
+}
-        let other = m.key_value(state);
+
-        r.extend(other.into_iter());
+
 /// combine several measurements
 pub fn eval_multiple<S>(state: &S, meas: &[&dyn AbstractMeasurement<S>]) -> Vec<Measurement> {
    meas.into_iter().flat_map(|m| m.key_value(state).into_iter()).collect()
 }
 #[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
 pub enum MeasurementValue {
    Field(Vec3<f32>),
    Float(f32),
    Int(u64),
    Dim(Vec3u),
 }
 impl From<Vec3<f32>> for MeasurementValue {
    fn from(v: Vec3<f32>) -> Self {
        Self::Field(v)
    }
 }
 impl From<f32> for MeasurementValue {
    fn from(v: f32) -> Self {
        Self::Float(v)
    }
 }
 impl From<u64> for MeasurementValue {
    fn from(v: u64) -> Self {
        Self::Int(v)
    }
 }
 impl From<Vec3u> for MeasurementValue {
    fn from(v: Vec3u) -> Self {
        Self::Dim(v)
    }
 }
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct Measurement {
    name: String,
    value: MeasurementValue,
    /// e.g. "A" for Amps
    unit: String,
 }
 impl Measurement {
    fn new<T: Into<MeasurementValue>>(name: &str, value: T, unit: &str) -> Self {
        Self {
            name: name.to_owned(),
            value: value.into(),
            unit: unit.to_owned(),
        }
    }
    fn new_unitless<T: Into<MeasurementValue>>(name: &str, value: T) -> Self {
        Self::new(name, value, "")
    }
    pub fn name(&self) -> &str {
        &self.name
    }
    pub fn pretty_print(&self) -> String {
        use MeasurementValue::*;
        match self.value {
            Field(v) => format!("{}{}", v, self.unit),
            Float(f) => if self.unit != "" {
                SiScale::format_short(f, &self.unit)
            } else {
                f.to_string()
            },
            Int(u)   => format!("{}{}", u, self.unit),
            Dim(v)   => format!("{}x{}x{}{}", v.x(), v.y(), v.z(), self.unit),
        }
    }
    /// format the Measurement in a way that could be parseable later.
    /// one major use case for this is in dumping the type to a CSV.
    pub fn machine_readable(&self) -> String {
        use MeasurementValue::*;
        match self.value {
            Field(v) => format!("{},{},{}", v.x(), v.y(), v.z()),
            Float(f) => f.to_string(),
            Int(u)   => u.to_string(),
            Dim(v)   => format!("{},{},{}", v.x(), v.y(), v.z()),
        }
    }
    /// retrieve the float value of this measurement -- if it's of float type.
    /// useful for tests
    pub fn get_float(&self) -> Option<f32> {
        match self.value {
            MeasurementValue::Float(f) => Some(f),
            _ => None,
        }
    }
 }
 impl<S> AbstractMeasurement<S> for Measurement {
    fn key_value(&self, _state: &S) -> Vec<Measurement> {
        vec![self.clone()]
    }
 }
 enum SiScale {
    Pico,
    Nano,
    Micro,
    Milli,
    Unit,
    Kilo,
    Mega,
    Giga,
    Terra,
 }
 impl SiScale {
    fn for_value(v: f32) -> Self {
        use SiScale::*;
        match v.abs() {
            v if v < 1e-12 => Unit,
            v if v < 1e-9  => Pico,
            v if v < 1e-6  => Nano,
            v if v < 1e-3  => Micro,
            v if v < 1e0   => Milli,
            v if v < 1e3   => Unit,
            v if v < 1e6   => Kilo,
            v if v < 1e9   => Mega,
            v if v < 1e12  => Giga,
            v if v < 1e15  => Terra,
            _ => Unit
        }
    }
    /// return the numerical scale of this prefix.
    /// e.g. `scale(&Pico) -> 1e-12
    fn scale(&self) -> f32 {
        use SiScale::*;
        match *self {
            Pico =>  1e-12,
            Nano =>  1e-9,
            Micro => 1e-6,
            Milli => 1e-3,
            Unit =>  1.0,
            Kilo =>  1e3,
            Mega =>  1e6,
            Giga =>  1e9,
            Terra => 1e12,
        }
    }
    /// return the short string for this scale.
    /// e.g. `shortcode(Pico) -> "p"`
    fn shortcode(&self) -> &'static str {
        use SiScale::*;
        match *self {
            Pico =>  "p",
            Nano =>  "n",
            Micro => "u",
            Milli => "m",
            Unit =>  "",
            Kilo =>  "k",
            Mega =>  "M",
            Giga =>  "G",
            Terra => "T",
        }
    }
    /// format `v`, with the provided unit.
    /// e.g. `format_short(1234, "A") -> "1.23 kA"
    fn format_short(v: f32, unit: &str) -> String {
        let si = SiScale::for_value(v);
        let scaled = v / si.scale();
        format!("{:.2} {}{}", scaled, si.shortcode(), unit)
    }
    r
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Time;
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for Time {
-impl AbstractMeasurement for Time {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+        vec![
-        format!("{:.3e}s (step {})", state.time(), state.step_no())
+            Measurement::new_unitless("step", state.step_no()),
-    }
+            Measurement::new("time", state.time(), "s"),
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+        ]
        [
            ("step".to_string(), state.step_no().to_string()),
            ("time".to_string(), state.time().to_string()),
        ].into_iter().collect()
    }
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Meta;
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for Meta {
-impl AbstractMeasurement for Meta {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+        vec![
-        format!("{}x{}x{} feat: {:.1e}m", state.width(), state.height(), state.depth(), state.feature_size())
+            Measurement::new_unitless("dim", state.size().0),
-    }
+            Measurement::new("feature_size", state.feature_size(), "m"),
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+        ]
        [
            ("width".to_string(), state.width().to_string()),
            ("height".to_string(), state.height().to_string()),
            ("depth".to_string(), state.depth().to_string()),
            ("feature_size".to_string(), state.feature_size().to_string()),
        ].into_iter().collect()
    }
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Label(pub String);
 impl Label {
    pub fn new<S: Into<String>>(s: S) -> Self {
        Self(s.into())
    }
 }
 #[typetag::serde]
 impl AbstractMeasurement for Label {
    fn eval(&self, _state: &dyn SampleableSim) -> String {
        self.0.clone()
    }
    fn key_value(&self, _state: &dyn SampleableSim) -> IndexMap<String, String> {
        [
            (self.0.clone(), self.0.clone()),
        ].into_iter().collect()
    }
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Volume {
    name: String,
    region: Box<dyn Region>,
@@ -92,29 +225,21 @@ impl Volume {
        }
    }
    /// Returns the volume of the region, in units of um^3
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+    fn data<S: AbstractSim>(&self, state: &S) -> f32 {
        let feat_um = state.feature_size() as f64 * 1e6;
        (state.volume_of_region(&*self.region) as f64 * feat_um * feat_um * feat_um) as f32
    }
 }
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for Volume {
-impl AbstractMeasurement for Volume {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
-    fn eval(&self, state: &dyn SampleableSim) -> String {
+        vec![
-        format!("Vol({}): {:.2e} um^3",
+            Measurement::new(&format!("Vol({})", self.name), self.data(state), "um^3"),
-            self.name,
+        ]
            self.data(state),
        )
    }
    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
        [
            (format!("Vol({})", self.name), self.data(state).to_string()),
        ].into_iter().collect()
    }
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Current {
    name: String,
    region: Box<dyn Region>,
@@ -127,7 +252,7 @@ impl Current {
            region: Box::new(r)
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> (f32, Vec3<f32>) {
+    fn data<S: AbstractSim>(&self, state: &S) -> (f32, Vec3<f32>) {
        let FieldSample(volume, current_mag, current_vec) = state.map_sum_over_enumerated(&*self.region, |coord: Meters, _cell| {
            let current = state.current(coord);
            FieldSample(1, current.mag().cast(), current.cast())
@@ -138,6 +263,24 @@ impl Current {
    }
 }
 // TODO: clean up these FieldSample types
 #[derive(Default)]
 struct TupleSum<T>(T);
 impl<T0: Default + AddAssign, T1: Default + AddAssign> std::iter::Sum for TupleSum<(T0, T1)> {
    fn sum<I>(iter: I) -> Self
        where I: Iterator<Item = Self>
    {
        let mut s = Self::default();
        for TupleSum((a0, a1)) in iter {
            s.0.0 += a0;
            s.0.1 += a1;
        }
        s
    }
 }
 #[derive(Default)]
 struct FieldSample(u32, f64, Vec3<f64>);
@@ -183,65 +326,79 @@ impl std::iter::Sum for FieldSamples<[FieldSample; 3]> {
    }
 }
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for Current {
-impl AbstractMeasurement for Current {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let (mean_current_mag, mean_current_vec) = self.data(state);
-        format!("I/cell({}): {:.2e} {:.2e}",
+        vec![
-            self.name,
+            Measurement::new(
-            mean_current_mag,
+                &format!("Imag/cell({})", self.name),
-            mean_current_vec)
+                mean_current_mag,
-    }
+                "A",
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+            ),
-        let (mean_current_mag, mean_current_vec) = self.data(state);
+            Measurement::new(
-        [
+                &format!("I/cell({})", self.name),
-            (format!("Imag/cell({})", self.name), mean_current_mag.to_string()),
+                mean_current_vec,
-            (format!("I/cell({})", self.name), mean_current_vec.to_string()),
+                "A",
-        ].into_iter().collect()
+            ),
        ]
    }
 }
 /// Measures the current directed around a closed loop
 #[derive(Clone, Serialize, Deserialize)]
-pub struct CurrentLoop {
+pub struct CurrentLoop<R> {
    name: String,
-    region: Torus
+    region: R,
 }
-impl CurrentLoop {
+impl<R> CurrentLoop<R> {
-    pub fn new(name: &str, r: Torus) -> Self {
+    pub fn new(name: &str, r: R) -> Self {
        Self {
            name: name.into(),
            region: r,
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+}
-        let FieldSample(volume, directed_current, _current_vec) = state.map_sum_over_enumerated(&self.region, |coord: Meters, _cell| {
+impl<R: Region + HasCrossSection> CurrentLoop<R> {
-            let normal = self.region.axis();
+    fn data<S: AbstractSim>(&self, state: &S) -> f32 {
-            let to_coord = *coord - *self.region.center();
+        // - current exists as a property of a 2d surface.
-            let tangent = normal.cross(to_coord).norm();
+        // - the user has provided us a 3d volume which behaves as though it's an extruded surface:
-            let current = state.current(coord);
+        //   for any point in the volume we can query the normal vector of the cross section
-            let directed_current = current.dot(tangent.cast());
+        //   containing that point.
-            FieldSample(1, directed_current.cast(), current.cast())
+        // - we choose that measuring the "current" on such a volume means to measure the average
        //   current through all its cross sections (for most boring materials, each
        //   cross section has nearly identical current).
        // - therefore, enumerate the entire volume and compute the "net" current (the sum over
        //   each cell of whatever current in that cell is along the cross-section normal).
        //   then divide by the number of complete cross sections we measured, to average.
        let feature_area = state.feature_size() * state.feature_size();
        let TupleSum((net_current, cross_sections)) = state.map_sum_over_enumerated(&self.region, move |coord: Meters, _cell| {
            // `normal` represents both the size of the cross section (m^2) this cell belongs to,
            // and the normal direction of the cross section.
            let normal = self.region.cross_section_normal(coord);  // [m^2]
            // calculate the amount of normal current through this specific cell
            let current_density = state.current_density(coord);    // [A/m^2]
            let cross_sectional_current = feature_area * current_density.dot(normal.norm()); // [A]
            // keep track of how many cross sections we enumerate, since each additional cross
            // sections represents a double-count of the current.
            let num_cross_sections_filled = feature_area / normal.mag();
            TupleSum((cross_sectional_current, num_cross_sections_filled))
        });
-        let mean_directed_current = directed_current.cast::<f32>() / f32::from_primitive(volume);
+        let mean_cross_sectional_current = net_current.cast::<f32>() / cross_sections;
-        let cross_section = self.region.cross_section() / (state.feature_size() * state.feature_size());
+        mean_cross_sectional_current
        let cross_sectional_current = mean_directed_current * cross_section;
        cross_sectional_current
    }
 }
-#[typetag::serde]
+impl<R: Region + HasCrossSection, S: AbstractSim> AbstractMeasurement<S> for CurrentLoop<R> {
-impl AbstractMeasurement for CurrentLoop {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let cross_sectional_current = self.data(state);
-        format!("I({}): {:.2e}", self.name, cross_sectional_current)
+        vec![
-    }
+            Measurement::new(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("I({})", self.name),
-        let cross_sectional_current = self.data(state);
+                cross_sectional_current,
-        [
+                "A"
-            (format!("I({})", self.name), cross_sectional_current.to_string()),
+            ),
-        ].into_iter().collect()
+        ]
    }
 }
@@ -260,7 +417,7 @@ impl MagneticLoop {
            region: r,
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> (f32, f32, f32) {
+    fn data<S: AbstractSim>(&self, state: &S) -> (f32, f32, f32) {
        let FieldSamples([
            FieldSample(volume, directed_m, _m_vec),
            FieldSample(_, directed_b, _b_vec),
@@ -300,29 +457,18 @@ impl MagneticLoop {
    }
 }
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticLoop {
-impl AbstractMeasurement for MagneticLoop {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let (mean_directed_m, mean_directed_b, mean_directed_h) = self.data(state);
-        format!(
+        vec![
-            "M({}): {:.2e}; B({}): {:.2e}; H({}): {:.2e}",
+            Measurement::new_unitless(&format!("M({})", self.name), mean_directed_m),
-            self.name, mean_directed_m,
+            Measurement::new_unitless(&format!("B({})", self.name), mean_directed_b),
-            self.name, mean_directed_b,
+            Measurement::new_unitless(&format!("H({})", self.name), mean_directed_h),
-            self.name, mean_directed_h,
+        ]
        )
    }
    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
        let (mean_directed_m, mean_directed_b, mean_directed_h) = self.data(state);
        [
            (format!("M({})", self.name), mean_directed_m.to_string()),
            (format!("B({})", self.name), mean_directed_b.to_string()),
            (format!("H({})", self.name), mean_directed_h.to_string()),
        ].into_iter().collect()
    }
 }
 /// mean M over a region
 #[derive(Clone, Serialize, Deserialize)]
 pub struct MagneticFlux {
    name: String,
    region: Box<dyn Region>,
@@ -335,7 +481,7 @@ impl MagneticFlux {
            region: Box::new(r)
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> Vec3<f32> {
+    fn data<S: AbstractSim>(&self, state: &S) -> Vec3<f32> {
        let FieldSample(volume, _directed_mag, mag_vec) = state.map_sum_over(&*self.region, |cell| {
            let b = cell.b();
            let mag = b.mag();
@@ -346,22 +492,19 @@ impl MagneticFlux {
    }
 }
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticFlux {
-impl AbstractMeasurement for MagneticFlux {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let mean_mag = self.data(state);
-        format!("Bavg({}): {:.2e}", self.name, mean_mag)
+        vec![
-    }
+            Measurement::new_unitless(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("Bavg({})", self.name),
-        let mean_mag = self.data(state);
+                mean_mag,
-        [
+            )
-            (format!("Bavg({})", self.name), mean_mag.to_string()),
+        ]
        ].into_iter().collect()
    }
 }
 /// mean B over a region
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Magnetization {
    name: String,
    region: Box<dyn Region>,
@@ -374,7 +517,7 @@ impl Magnetization {
            region: Box::new(r)
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> Vec3<f32> {
+    fn data<S: AbstractSim>(&self, state: &S) -> Vec3<f32> {
        let FieldSample(volume, _directed_mag, mag_vec) = state.map_sum_over(&*self.region, |cell| {
            let m = cell.m();
            let mag = m.mag();
@@ -385,17 +528,14 @@ impl Magnetization {
    }
 }
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for Magnetization {
-impl AbstractMeasurement for Magnetization {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let mean_mag = self.data(state);
-        format!("Mavg({}): {:.2e}", self.name, mean_mag)
+        vec![
-    }
+            Measurement::new_unitless(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("Mavg({})", self.name), mean_mag
-        let mean_mag = self.data(state);
+            ),
-        [
+        ]
            (format!("Mavg({})", self.name), mean_mag.to_string()),
        ].into_iter().collect()
    }
 }
@@ -407,17 +547,12 @@ fn loc(v: Meters) -> String {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct MagnetizationAt(pub Meters);
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for MagnetizationAt {
-impl AbstractMeasurement for MagnetizationAt {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let m = state.sample(self.0).m();
-        format!("M{}: {:.2e}", loc(self.0), m)
+        vec![
-    }
+            Measurement::new_unitless(&format!("M{}", loc(self.0)), m.cast())
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+        ]
        let m = state.sample(self.0).m();
        [
            (format!("M{}", loc(self.0)), m.to_string()),
        ].into_iter().collect()
    }
 }
@@ -425,17 +560,14 @@ impl AbstractMeasurement for MagnetizationAt {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct MagneticFluxAt(pub Meters);
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticFluxAt {
-impl AbstractMeasurement for MagneticFluxAt {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let b = state.sample(self.0).b();
-        format!("B{}: {:.2e}", loc(self.0), b)
+        vec![
-    }
+            Measurement::new_unitless(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("B{}", loc(self.0)), b.cast()
-        let b = state.sample(self.0).b();
+            )
-        [
+        ]
            (format!("B{}", loc(self.0)), b.to_string()),
        ].into_iter().collect()
    }
 }
@@ -443,38 +575,31 @@ impl AbstractMeasurement for MagneticFluxAt {
 #[derive(Clone, Serialize, Deserialize)]
 pub struct MagneticStrengthAt(pub Meters);
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for MagneticStrengthAt {
-impl AbstractMeasurement for MagneticStrengthAt {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let h = state.sample(self.0).h();
-        format!("H{}: {:.2e}", loc(self.0), h)
+        vec![
-    }
+            Measurement::new_unitless(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("H{}", loc(self.0)), h.cast()
-        let h = state.sample(self.0).h();
+            )
-        [
+        ]
            (format!("H{}", loc(self.0)), h.to_string()),
        ].into_iter().collect()
    }
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct ElectricField(pub Meters);
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for ElectricField {
-impl AbstractMeasurement for ElectricField {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let e = state.sample(self.0).e();
-        format!("E{}: {:.2e}", loc(self.0), e)
+        vec![
-    }
+            Measurement::new_unitless(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("E{}", loc(self.0)), e.cast()
-        let e = state.sample(self.0).e();
+            )
-        [
+        ]
            (format!("E{}", loc(self.0)), e.to_string()),
        ].into_iter().collect()
    }
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Energy {
    name: String,
    region: Box<dyn Region>,
@@ -490,7 +615,7 @@ impl Energy {
            region: Box::new(region),
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+    pub(crate) fn data<S: AbstractSim>(&self, state: &S) -> f32 {
        // Potential energy stored in a E/M field:
        //   https://en.wikipedia.org/wiki/Magnetic_energy
        //   https://en.wikipedia.org/wiki/Electric_potential_energy#Energy_stored_in_an_electrostatic_field_distribution
@@ -507,21 +632,17 @@ impl Energy {
    }
 }
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for Energy {
-impl AbstractMeasurement for Energy {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let e = self.data(state);
-        format!("U({}): {:.2e}", self.name, e)
+        vec![
-    }
+            Measurement::new(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("U({})", self.name), e, "J"
-        let e = self.data(state);
+            )
-        [
+        ]
            (format!("U({})", self.name), e.to_string()),
        ].into_iter().collect()
    }
 }
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Power {
    name: String,
    region: Box<dyn Region>
@@ -537,7 +658,7 @@ impl Power {
            region: Box::new(region),
        }
    }
-    fn data(&self, state: &dyn SampleableSim) -> f32 {
+    fn data<S: AbstractSim>(&self, state: &S) -> f32 {
        // Power is P = IV = A*J*V = L^2*J.(LE) = L^3 J.E
        // where L is feature size.
        #[allow(non_snake_case)]
@@ -549,16 +670,186 @@ impl Power {
    }
 }
-#[typetag::serde]
+impl<S: AbstractSim> AbstractMeasurement<S> for Power {
-impl AbstractMeasurement for Power {
+    fn key_value(&self, state: &S) -> Vec<Measurement> {
    fn eval(&self, state: &dyn SampleableSim) -> String {
        let power = self.data(state);
-        format!("P({}): {:.2e}", self.name, power)
+        vec![
-    }
+            Measurement::new(
-    fn key_value(&self, state: &dyn SampleableSim) -> IndexMap<String, String> {
+                &format!("P({})", self.name), power, "W"
-        let power = self.data(state);
+            )
-        [
+        ]
-            (format!("P({})", self.name), power.to_string()),
+    }
-        ].into_iter().collect()
+}
 #[cfg(test)]
 pub mod test {
    use super::*;
    use crate::cross::mat::AnisomorphicConductor;
    use crate::cross::step::SimMeta;
    use crate::geom::Index;
    use crate::sim::{Fields, GenericSim};
    use crate::stim::Stimulus;
    struct MockSim {
        e_field: Vec3<f32>,
        dim: Vec3u,
        feature_size: f32,
        mat: AnisomorphicConductor<f32>,
    }
    impl AbstractSim for MockSim {
        type Real = f32;
        type Material = AnisomorphicConductor<f32>;
        fn meta(&self) -> SimMeta<f32> {
            SimMeta::new(self.dim, self.feature_size, 1e-9)
        }
        fn step_no(&self) -> u64 {
            unimplemented!()
        }
        fn fields_at_index(&self, _pos: Index) -> Fields<Self::Real> {
            Fields::new(self.e_field, Vec3::zero(), Vec3::zero())
        }
        fn get_material_index(&self, _at: Index) -> &Self::Material {
            &self.mat
        }
        fn put_material_index(&mut self, _at: Index, _m: Self::Material) {
            unimplemented!()
        }
        fn step_multiple<S: Stimulus<f32>>(&mut self, _num_steps: u32, _s: &S) {
            unimplemented!()
        }
        fn to_generic(&self) -> GenericSim<Self::Real> {
            unimplemented!()
        }
    }
    struct MockRegion {
        normal: Vec3<f32>,
    }
    impl HasCrossSection for MockRegion {
        fn cross_section_normal(&self, _p: Meters) -> Vec3<f32> {
            self.normal
        }
    }
    impl Region for MockRegion {
        fn contains(&self, _p: Meters) -> bool {
            true
        }
    }
    #[test]
    fn current_loop_trivial() {
        let sim = MockSim {
            e_field: Vec3::new(1.0, 0.0, 0.0),
            dim: Vec3u::new(1, 1, 1),
            feature_size: 1.0,
            mat: AnisomorphicConductor::new(Vec3::new(1.0, 1.0, 1.0)),
        };
        let region = MockRegion {
            normal: Vec3::new(1.0, 0.0, 0.0),
        };
        let kv = CurrentLoop::new("test", region).key_value(&sim);
        assert_eq!(kv.len(), 1);
        // measured area is 1 m^2
        // region cross-section is 1 m^2
        // conductivity is 1 S/m
        assert_eq!(kv[0].get_float().unwrap(), 1.0);
    }
    #[test]
    fn current_loop_multi_cell() {
        let sim = MockSim {
            e_field: Vec3::new(1.0, 0.0, 0.0),
            dim: Vec3u::new(4, 4, 4),
            feature_size: 0.25,
            mat: AnisomorphicConductor::new(Vec3::new(1.0, 1.0, 1.0)),
        };
        let region = MockRegion {
            normal: Vec3::new(1.0, 0.0, 0.0),
        };
        let kv = CurrentLoop::new("test", region).key_value(&sim);
        assert_eq!(kv.len(), 1);
        // measured area is 1 m^2
        // region cross-section is 1 m^2
        // conductivity is 1 S/m
        assert_eq!(kv[0].get_float().unwrap(), 1.0);
    }
    #[test]
    fn current_loop_off_conductor() {
        let sim = MockSim {
            e_field: Vec3::new(1.0, 1.0, 1.0),
            dim: Vec3u::new(4, 4, 4),
            feature_size: 0.25,
            mat: AnisomorphicConductor::new(Vec3::new(0.0, 1.0, 1.0)),
        };
        let region = MockRegion {
            normal: Vec3::new(1.0, 0.0, 0.0),
        };
        let kv = CurrentLoop::new("test", region).key_value(&sim);
        assert_eq!(kv.len(), 1);
        // material is not conductive in the direction being queried
        assert_eq!(kv[0].get_float().unwrap(), 0.0);
    }
    #[test]
    fn current_loop_e_field() {
        let sim = MockSim {
            e_field: Vec3::new(4.0, 2.0, 1.0),
            dim: Vec3u::new(4, 4, 4),
            feature_size: 0.25,
            mat: AnisomorphicConductor::new(Vec3::new(1.0, 1.0, 1.0)),
        };
        let region = MockRegion {
            normal: Vec3::new(1.0, 0.0, 0.0),
        };
        let kv = CurrentLoop::new("test", region).key_value(&sim);
        assert_eq!(kv.len(), 1);
        // measured area is 1 m^2
        // region cross-section is 1 m^2
        // conductivity is 1 S/m
        // e field is 4 V/m
        assert_eq!(kv[0].get_float().unwrap(), 4.0);
    }
    #[test]
    fn current_loop_conductivity() {
        let sim = MockSim {
            e_field: Vec3::new(4.0, 2.0, 1.0),
            dim: Vec3u::new(4, 4, 4),
            feature_size: 0.25,
            mat: AnisomorphicConductor::new(Vec3::new(3.0, 1.0, 1.0)),
        };
        let region = MockRegion {
            normal: Vec3::new(1.0, 0.0, 0.0),
        };
        let kv = CurrentLoop::new("test", region).key_value(&sim);
        assert_eq!(kv.len(), 1);
        // measured area is 1 m^2
        // region cross-section is 1 m^2
        // conductivity is 3 S/m
        // e field is 4 V/m
        assert_eq!(kv[0].get_float().unwrap(), 3.0*4.0);
    }
    #[test]
    fn current_loop_cross_section() {
        let sim = MockSim {
            e_field: Vec3::new(4.0, 2.0, 1.0),
            dim: Vec3u::new(4, 4, 4),
            feature_size: 0.5,
            mat: AnisomorphicConductor::new(Vec3::new(3.0, 1.0, 1.0)),
        };
        let region = MockRegion {
            normal: Vec3::new(16.0, 0.0, 0.0),
        };
        let kv = CurrentLoop::new("test", region).key_value(&sim);
        assert_eq!(kv.len(), 1);
        // measured area is 2 m^2
        // region cross-section is 16 m^2
        // conductivity is 3 S/m
        // e field is 4 V/m
        assert_eq!(kv[0].get_float().unwrap(), 3.0*4.0*16.0);
    }
 }
--- a/crates/coremem/src/render.rs
+++ b/crates/coremem/src/render.rs
@@ -1,7 +1,8 @@
-use crate::geom::{Meters, Vec2, Vec3};
+use crate::geom::Index;
 use crate::real::ToFloat as _;
-use crate::sim::{SampleableSim, Sample, StaticSim};
+use crate::cross::vec::{Vec2, Vec3};
-use crate::meas::{self, AbstractMeasurement};
+use crate::sim::{AbstractSim, GenericSim, Sample};
 use crate::meas::{self, AbstractMeasurement, Measurement};
 use crossterm::{cursor, QueueableCommand as _};
 use crossterm::style::{style, Color, PrintStyledContent, Stylize as _};
 use font8x8::{BASIC_FONTS, GREEK_FONTS, UnicodeFonts as _};
@@ -11,6 +12,8 @@ use image::{RgbImage, Rgb};
 use imageproc::{pixelops, drawing};
 use rayon::prelude::*;
 use serde::{Serialize, Deserialize};
 use std::collections::hash_map::DefaultHasher;
 use std::hash::Hasher;
 use std::fs::{File, OpenOptions};
 use std::io::{BufReader, BufWriter, Seek as _, SeekFrom, Write as _};
 use std::path::{Path, PathBuf};
@@ -51,10 +54,10 @@ fn scale_unsigned_to_u8(x: f32, typ: f32) -> u8 {
 /// Scale a vector to have magnitude between [0, 1).
 fn scale_vector(x: Vec2<f32>, typical_mag: f32) -> Vec2<f32> {
    let new_mag = scale_unsigned(x.mag(), typical_mag);
-    x.with_mag(new_mag)
+    x.with_mag(new_mag).unwrap_or_default()
 }
-fn im_size<S: SampleableSim>(state: &S, max_w: u32, max_h: u32) -> (u32, u32) {
+fn im_size<S: AbstractSim>(state: &S, max_w: u32, max_h: u32) -> (u32, u32) {
    let mut width = max_w;
    let mut height = width * state.height() / state.width();
    if height > max_h {
@@ -71,6 +74,7 @@ pub enum FieldDisplayMode {
    EzBxy,
    BCurrent,
    M,
    Material,
 }
 impl FieldDisplayMode {
@@ -80,17 +84,19 @@ impl FieldDisplayMode {
            BzExy => EzBxy,
            EzBxy => BCurrent,
            BCurrent => M,
-            M => BzExy,
+            M => Material,
            Material => BzExy,
        }
    }
    pub fn prev(self) -> Self {
        use FieldDisplayMode::*;
        match self {
-            BzExy => M,
+            BzExy => Material,
            EzBxy => BzExy,
            BCurrent => EzBxy,
            M => BCurrent,
            Material => M,
        }
    }
 }
@@ -128,20 +134,22 @@ impl RenderConfig {
 struct RenderSteps<'a, S> {
    im: RgbImage, 
    sim: &'a S,
-    meas: &'a [Box<dyn AbstractMeasurement>],
+    meas: &'a [&'a dyn AbstractMeasurement<S>],
    /// Simulation z coordinate to sample
    z: u32,
 }
-impl<'a, S: SampleableSim> RenderSteps<'a, S> {
+impl<'a, S: AbstractSim> RenderSteps<'a, S> {
    // TODO: this could probably be a single measurement, and we just let collections of
    // measurements also behave as measurements
    /// Render using default configuration constants
-    fn render(state: &'a S, measurements: &'a [Box<dyn AbstractMeasurement>], z: u32) -> RgbImage {
+    fn render(state: &'a S, measurements: &'a [&'a dyn AbstractMeasurement<S>], z: u32) -> RgbImage {
        Self::render_configured(state, measurements, z, (640, 480), RenderConfig::default())
    }
    /// Render, controlling things like the size.
    fn render_configured(
        state: &'a S,
-        measurements: &'a [Box<dyn AbstractMeasurement>],
+        measurements: &'a [&'a dyn AbstractMeasurement<S>],
        z: u32,
        max_size: (u32, u32),
        config: RenderConfig,
@@ -173,11 +181,14 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
            FieldDisplayMode::M => {
                me.render_m(config.scale);
            }
            FieldDisplayMode::Material => {
                me.render_mat(config.scale);
            }
        }
        me.render_measurements();
        me.im
    }
-    fn new(sim: &'a S, meas: &'a [Box<dyn AbstractMeasurement>], width: u32, height: u32, z: u32) -> Self {
+    fn new(sim: &'a S, meas: &'a [&'a dyn AbstractMeasurement<S>], width: u32, height: u32, z: u32) -> Self {
        RenderSteps {
            im: RgbImage::new(width, height),
            sim,
@@ -186,13 +197,10 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
        }
    }
-    fn get_at_px(&self, x_px: u32, y_px: u32) -> Sample {
+    fn get_at_px<'b>(&'b self, x_px: u32, y_px: u32) -> Sample<'b, S::Real, S::Material> {
-        let x_prop = x_px as f32 / self.im.width() as f32;
+        let x_idx = x_px * self.sim.width() / self.im.width();
-        let x_m = x_prop * (self.sim.width() as f32 * self.sim.feature_size() as f32);
+        let y_idx = y_px * self.sim.height() / self.im.height();
-        let y_prop = y_px as f32 / self.im.height() as f32;
+        self.sim.sample(Index::new(x_idx, y_idx, self.z))
        let y_m = y_prop * (self.sim.height() as f32 * self.sim.feature_size() as f32);
        let z_m = self.z as f32 * self.sim.feature_size() as f32;
        self.sim.sample(Meters(Vec3::new(x_m, y_m, z_m)))
    }
    //////////////   Ex/Ey/Bz configuration ////////////
@@ -229,7 +237,22 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
        self.render_vector_field(Rgb([0xff, 0xff, 0xff]), 1.0e5 * scale, |cell| cell.m().xy().to_f32());
    }
-    fn render_vector_field<F: Fn(&Sample) -> Vec2<f32>>(&mut self, color: Rgb<u8>, typical: f32, measure: F) {
+    fn render_mat(&mut self, scale: f32) {
        unsafe fn to_bytes<T>(d: &T) -> &[u8] {
            std::slice::from_raw_parts(d as *const T as *const u8, std::mem::size_of::<T>())
        }
        self.render_scalar_field(scale, false, 1, |cell| {
            let mut hasher = DefaultHasher::new();
            let as_bytes = unsafe { to_bytes(cell.material()) };
            std::hash::Hash::hash_slice(as_bytes, &mut hasher);
            hasher.finish() as f32 / (-1i64 as u64 as f32)
        });
    }
    fn render_vector_field<F>(&mut self, color: Rgb<u8>, typical: f32, measure: F)
    where
        F: Fn(&Sample<'_, S::Real, S::Material>) -> Vec2<f32>
    {
        let w = self.im.width();
        let h = self.im.height();
        let vec_spacing = 10;
@@ -244,7 +267,10 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
            }
        }
    }
-    fn render_scalar_field<F: Fn(&Sample) -> f32 + Sync>(&mut self, typical: f32, signed: bool, slot: u32, measure: F) {
+    fn render_scalar_field<F>(&mut self, typical: f32, signed: bool, slot: u32, measure: F)
    where
        F: Fn(&Sample<'_, S::Real, S::Material>) -> f32 + Sync
    {
        // XXX: get_at_px borrows self, so we need to clone the image to operate on it mutably.
        let mut im = self.im.clone();
        let w = im.width();
@@ -268,8 +294,8 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
        self.im = im;
    }
    fn render_measurements(&mut self) {
-        for (meas_no, m) in self.meas.iter().enumerate() {
+        for (meas_no, m) in meas::eval_multiple(self.sim, &self.meas).into_iter().enumerate() {
-            let meas_string = m.eval(self.sim);
+            let meas_string = m.pretty_print();
            for (i, c) in meas_string.chars().enumerate() {
                let glyph = BASIC_FONTS.get(c)
                    .or_else(|| GREEK_FONTS.get(c))
@@ -290,7 +316,10 @@ impl<'a, S: SampleableSim> RenderSteps<'a, S> {
        }
    }
-    fn field_vector<F: Fn(&Sample) -> Vec2<f32>>(&self, xidx: u32, yidx: u32, size: u32, measure: &F) -> Vec2<f32> {
+    fn field_vector<F>(&self, xidx: u32, yidx: u32, size: u32, measure: &F) -> Vec2<f32>
    where
        F: Fn(&Sample<'_, S::Real, S::Material>) -> Vec2<f32>
    {
        let mut field = Vec2::default();
        let w = self.im.width();
        let h = self.im.height();
@@ -339,25 +368,25 @@ impl ImageRenderExt for RgbImage {
 }
 pub trait Renderer<S>: Send + Sync {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig);
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig);
    // {
    //     self.render_with_image(state, &RenderSteps::render(state, measurements, z), measurements);
    // }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig);
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig);
    /// Not intended to be called directly by users; implement this if you want the image to be
    /// computed using default settings and you just manage where to display/save it.
-    fn render_with_image(&self, state: &S, _im: &RgbImage, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_with_image(&self, state: &S, _im: &RgbImage, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        self.render(state, measurements, config);
    }
 }
-fn default_render_z_slice<S: SampleableSim, R: Renderer<S>>(
+fn default_render_z_slice<S: AbstractSim, R: Renderer<S>>(
-    me: &R, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig,
+    me: &R, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig,
 ) {
    me.render_with_image(state, &RenderSteps::render(state, measurements, z), measurements, config);
 }
-fn default_render<S: SampleableSim, R: Renderer<S>>(
+fn default_render<S: AbstractSim, R: Renderer<S>>(
-    me: &R, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig
+    me: &R, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig
 ) {
        me.render_z_slice(state, state.depth() / 2, measurements, config);
 }
@@ -365,7 +394,7 @@ fn default_render<S: SampleableSim, R: Renderer<S>>(
 // pub struct NumericTermRenderer;
 // 
 // impl Renderer for NumericTermRenderer {
-//     fn render(&mut self, state: &SimSnapshot, _measurements: &[Box<dyn AbstractMeasurement>]) {
+//     fn render(&mut self, state: &SimSnapshot, _measurements: &[&dyn AbstractMeasurement<S>]) {
 //         for y in 0..state.height() {
 //             for x in 0..state.width() {
 //                 let cell = state.get((x, y).into());
@@ -385,17 +414,18 @@ fn default_render<S: SampleableSim, R: Renderer<S>>(
 #[derive(Default)]
 pub struct ColorTermRenderer;
-impl<S: SampleableSim> Renderer<S> for ColorTermRenderer {
+impl<S: AbstractSim> Renderer<S> for ColorTermRenderer {
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render(self, state, measurements, config)
    }
    fn render_z_slice(
        &self,
        state: &S,
        z: u32,
-        measurements: &[Box<dyn AbstractMeasurement>],
+        measurements: &[&dyn AbstractMeasurement<S>],
        config: RenderConfig,
    ) {
        let measurements = meas::eval_multiple(state, measurements);
        let (max_w, mut max_h) = crossterm::terminal::size().unwrap();
        max_h = max_h.saturating_sub(2 + measurements.len() as u16);
        let im = RenderSteps::render_configured(state, &[], z, (max_w as _, max_h as _), config);
@@ -424,7 +454,7 @@ impl<S: SampleableSim> Renderer<S> for ColorTermRenderer {
        for m in measurements {
            // Measurements can be slow to compute
            stdout.flush().unwrap();
-            let meas_string = m.eval(state);
+            let meas_string = format!("{}: \t{}", m.name(), m.pretty_print());
            stdout.queue(cursor::MoveDown(1)).unwrap();
            stdout.queue(cursor::MoveToColumn(1)).unwrap();
            stdout.queue(PrintStyledContent(style(meas_string))).unwrap();
@@ -447,14 +477,14 @@ impl Y4MRenderer {
    }
 }
-impl<S: SampleableSim> Renderer<S> for Y4MRenderer {
+impl<S: AbstractSim> Renderer<S> for Y4MRenderer {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render(self, state, measurements, config)
    }
-    fn render_with_image(&self, _state: &S, im: &RgbImage, _meas: &[Box<dyn AbstractMeasurement>], _config: RenderConfig) {
+    fn render_with_image(&self, _state: &S, im: &RgbImage, _meas: &[&dyn AbstractMeasurement<S>], _config: RenderConfig) {
        {
            let mut enc = self.encoder.lock().unwrap();
            if enc.is_none() {
@@ -505,6 +535,14 @@ impl<S> MultiRendererElement<S> {
            Some(end) => frame < end,
        }
    }
    fn next_frame_for_work(&self, after: u64) -> Option<u64> {
        let max_frame = after + self.step_frequency;
        let max_frame = max_frame - max_frame % self.step_frequency;
        match self.step_limit {
            None => Some(max_frame),
            Some(end) => Some(max_frame).filter(|&f| f < end)
        }
    }
 }
 pub struct MultiRenderer<S> {
@@ -537,19 +575,22 @@ impl<S> MultiRenderer<S> {
    pub fn any_work_for_frame(&self, frame: u64) -> bool {
        self.renderers.read().unwrap().iter().any(|m| m.work_this_frame(frame))
    }
    pub fn next_frame_for_work(&self, after: u64) -> Option<u64> {
        self.renderers.read().unwrap().iter().flat_map(|m| m.next_frame_for_work(after)).min()
    }
 }
-impl<S: SampleableSim> Renderer<S> for MultiRenderer<S> {
+impl<S: AbstractSim> Renderer<S> for MultiRenderer<S> {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        if self.renderers.read().unwrap().len() != 0 {
            self.render_with_image(state, &RenderSteps::render(state, measurements, state.depth() / 2), measurements, config);
        }
    }
-    fn render_with_image(&self, state: &S, im: &RgbImage, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_with_image(&self, state: &S, im: &RgbImage, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        for r in &*self.renderers.read().unwrap() {
            if r.work_this_frame(state.step_no()) {
                r.renderer.render_with_image(state, im, measurements, config);
@@ -559,25 +600,28 @@ impl<S: SampleableSim> Renderer<S> for MultiRenderer<S> {
 }
 #[derive(Serialize, Deserialize)]
-pub struct SerializedFrame<S=StaticSim> {
+pub struct SerializedFrame<S> {
    pub state: S,
    /// although not generally necessary to load the sim, saving the measurements is beneficial for
    /// post-processing.
-    pub measurements: Vec<Box<dyn AbstractMeasurement>>,
+    pub measurements: Vec<Measurement>,
 }
-impl<S: SampleableSim> SerializedFrame<S> {
+impl<S: AbstractSim> SerializedFrame<S> {
-    pub fn to_static(self) -> SerializedFrame<StaticSim> {
+    pub fn to_generic(self) -> SerializedFrame<GenericSim<S::Real>> {
        SerializedFrame {
-            state: SampleableSim::to_static(&self.state),
+            state: AbstractSim::to_generic(&self.state),
            measurements: self.measurements,
        }
    }
 }
 /// this serializes the simulation state plus measurements to disk.
 /// it can either convert the state to a generic, material-agnostic format (generic)
 /// or dump it as-is.
 pub struct SerializerRenderer {
    fmt_str: String,
-    prefer_static: bool,
+    prefer_generic: bool,
 }
 impl SerializerRenderer {
@@ -586,47 +630,50 @@ impl SerializerRenderer {
    pub fn new(fmt_str: &str) -> Self {
        Self {
            fmt_str: fmt_str.into(),
-            prefer_static: false,
+            prefer_generic: false,
        }
    }
-    /// Same as `new`, but cast to StaticSim before serializing. This yields a file that's easier
+    /// Same as `new`, but cast to GenericSim before serializing. This yields a file that's easier
-    /// for post-processing, and may be smaller in size.
+    /// for post-processing.
-    pub fn new_static(fmt_str: &str) -> Self {
+    pub fn new_generic(fmt_str: &str) -> Self {
        Self {
            fmt_str: fmt_str.into(),
-            prefer_static: true,
+            prefer_generic: true,
        }
    }
 }
 impl SerializerRenderer {
-    fn serialize<S: SampleableSim + Serialize>(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>]) {
+    fn serialize<S: AbstractSim + Serialize>(&self, state: &S, measurements: Vec<Measurement>) {
        let frame = SerializedFrame {
            state,
-            measurements: measurements.iter().cloned().collect(),
+            measurements,
        };
        let name = self.fmt_str.replace("{step_no}", &*frame.state.step_no().to_string());
-        let out = BufWriter::new(File::create(name).unwrap());
+        // serialize to a temporary file -- in case we run out of disk space, etc.
-        //serde_cbor::to_writer(out, &snap).unwrap();
+        let temp_name = format!("{}.incomplete", name);
        let out = BufWriter::new(File::create(&temp_name).unwrap());
        bincode::serialize_into(out, &frame).unwrap();
        // atomically complete the write.
        std::fs::rename(temp_name, name).unwrap();
    }
-    pub fn try_load<S: SampleableSim + for <'a> Deserialize<'a>>(&self) -> Option<SerializedFrame<S>> {
+    pub fn try_load<S: AbstractSim + for <'a> Deserialize<'a>>(&self) -> Option<SerializedFrame<S>> {
        let mut reader = BufReader::new(File::open(&*self.fmt_str).ok()?);
        bincode::deserialize_from(&mut reader).ok()
    }
 }
-impl<S: SampleableSim + Serialize> Renderer<S> for SerializerRenderer {
+impl<S: AbstractSim + Serialize> Renderer<S> for SerializerRenderer {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], _config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], _config: RenderConfig) {
-        if self.prefer_static {
+        if self.prefer_generic {
-            self.serialize(&state.to_static(), measurements);
+            self.serialize(&state.to_generic(), meas::eval_multiple(state, measurements));
        } else {
-            self.serialize(state, measurements);
+            self.serialize(state, meas::eval_multiple(state, measurements));
        }
    }
 }
@@ -661,12 +708,12 @@ impl CsvRenderer {
    }
 }
-impl<S: SampleableSim> Renderer<S> for CsvRenderer {
+impl<S: AbstractSim> Renderer<S> for CsvRenderer {
-    fn render_z_slice(&self, state: &S, z: u32, measurements: &[Box<dyn AbstractMeasurement>], config: RenderConfig) {
+    fn render_z_slice(&self, state: &S, z: u32, measurements: &[&dyn AbstractMeasurement<S>], config: RenderConfig) {
        default_render_z_slice(self, state, z, measurements, config)
    }
-    fn render(&self, state: &S, measurements: &[Box<dyn AbstractMeasurement>], _config: RenderConfig) {
+    fn render(&self, state: &S, measurements: &[&dyn AbstractMeasurement<S>], _config: RenderConfig) {
-        let row = meas::eval_multiple_kv(state, measurements);
+        let row = meas::eval_multiple(state, measurements);
        let step = state.step_no();
        let mut lock = self.state.lock().unwrap();
        let mut writer = match lock.take().unwrap() {
@@ -700,13 +747,13 @@ impl<S: SampleableSim> Renderer<S> for CsvRenderer {
                    file.set_len(0).unwrap();
                    let mut writer = csv::Writer::from_writer(BufWriter::new(file));
                    // write the header
-                    writer.write_record(row.keys()).unwrap();
+                    writer.write_record(row.iter().map(|m| m.name())).unwrap();
                    writer
                }
            },
            CsvState::Writing(writer) => writer,
        };
-        writer.write_record(row.values()).unwrap();
+        writer.write_record(row.iter().map(|m| m.machine_readable())).unwrap();
        writer.flush().unwrap();
        *lock = Some(CsvState::Writing(writer));
    }
--- a/crates/coremem/src/sim/mod.rs
+++ b/crates/coremem/src/sim/mod.rs
--- a/crates/coremem/src/sim/spirv/bindings.rs
+++ b/crates/coremem/src/sim/spirv/bindings.rs
@@ -1,251 +0,0 @@
 use serde::de::Deserializer;
 use serde::ser::Serializer;
 use serde::{Deserialize, Serialize};
 use crate::mat::{AnisomorphicConductor, IsoConductorOr, IsomorphicConductor, Ferroxcube3R1MH, MaterialExt as _, MBFerromagnet, MBPgram, MHPgram, Static};
 use crate::geom::{Index, Vec3, Vec3u};
 /// hide the actual spirv backend structures inside a submodule to make their use/boundary clear.
 mod ffi {
    pub use spirv_backend::entry_points;
    pub use spirv_backend::sim::SerializedSimMeta;
    pub use spirv_backend::support::Optional;
    pub use spirv_backend::mat::FullyGenericMaterial;
    pub use coremem_types::mat::MBPgram;
 }
 // conversion traits for types defined cross-lib
 pub trait IntoFfi {
    type Ffi;
    fn into_ffi(self) -> Self::Ffi;
 }
 pub trait IntoLib {
    type Lib;
    fn into_lib(self) -> Self::Lib;
 }
 macro_rules! identity {
    ($($param:ident,)* => $t:ty) => {
        impl<$($param: IntoFfi),*> IntoFfi for $t {
            type Ffi = $t;
            fn into_ffi(self) -> Self::Ffi {
                self
            }
        }
        impl<$($param: IntoLib),*> IntoLib for $t {
            type Lib = $t;
            fn into_lib(self) -> Self::Lib {
                self
            }
        }
    };
 }
 // XXX: should work for any other lifetime, not just 'static
 identity!(=> f32);
 identity!(=> &'static str);
 identity!(T0, T1, => (T0, T1));
 identity!(=> Vec3u);
 identity!(T, => Vec3<T>);
 impl<L: IntoFfi> IntoFfi for Option<L>
    where L::Ffi: Default
 {
    type Ffi = ffi::Optional<L::Ffi>;
    fn into_ffi(self) -> Self::Ffi {
        match self {
            Some(s) => ffi::Optional::some(s.into_ffi()),
            None => ffi::Optional::none(),
        }
    }
 }
 impl<F: Copy + IntoLib> IntoLib for ffi::Optional<F> {
    type Lib = Option<F::Lib>;
    fn into_lib(self) -> Self::Lib {
        if self.is_some() {
            Some(self.unwrap().into_lib())
        } else {
            None
        }
    }
 }
 impl IntoFfi for MBPgram<f32> {
    type Ffi = ffi::MBPgram<f32>;
    fn into_ffi(self) -> Self::Ffi {
        Self::Ffi::new(self.b_start, self.b_end, self.max_m)
    }
 }
 impl IntoLib for ffi::MBPgram<f32> {
    type Lib = MBPgram<f32>;
    fn into_lib(self) -> Self::Lib {
        Self::Lib::new(self.b_start, self.b_end, self.max_m)
    }
 }
 identity!( => MHPgram<f32>);
 identity!( => Ferroxcube3R1MH);
 identity!(R, M, => IsoConductorOr<R, M>);
 #[derive(Clone, Default, PartialEq, Serialize, Deserialize)]
 pub struct FullyGenericMaterial {
    pub conductivity: Vec3<f32>,
    pub m_b_curve: Option<MBPgram<f32>>,
    pub m_h_curve: Option<MHPgram<f32>>,
 }
 impl IntoFfi for FullyGenericMaterial {
    type Ffi = ffi::FullyGenericMaterial;
    fn into_ffi(self) -> Self::Ffi {
        Self::Ffi {
            conductivity: self.conductivity.into_ffi(),
            m_b_curve: self.m_b_curve.into_ffi(),
            m_h_curve: self.m_h_curve.into_ffi(),
        }
    }
 }
 impl IntoLib for ffi::FullyGenericMaterial {
    type Lib = FullyGenericMaterial;
    fn into_lib(self) -> Self::Lib {
        Self::Lib {
            conductivity: self.conductivity.into_lib(),
            m_b_curve: self.m_b_curve.into_lib(),
            m_h_curve: self.m_h_curve.into_lib(),
        }
    }
 }
 impl From<Static<f32>> for FullyGenericMaterial {
    fn from(m: Static<f32>) -> Self {
        FullyGenericMaterial {
            conductivity: m.conductivity(),
            .. Default::default()
        }
    }
 }
 impl From<AnisomorphicConductor<f32>> for FullyGenericMaterial {
    fn from(m: AnisomorphicConductor<f32>) -> Self {
        FullyGenericMaterial {
            conductivity: m.conductivity(),
            .. Default::default()
        }
    }
 }
 impl From<IsomorphicConductor<f32>> for FullyGenericMaterial {
    fn from(m: IsomorphicConductor<f32>) -> Self {
        FullyGenericMaterial {
            conductivity: m.conductivity(),
            .. Default::default()
        }
    }
 }
 impl From<MBFerromagnet<f32>> for FullyGenericMaterial {
    fn from(m: MBFerromagnet<f32>) -> Self {
        FullyGenericMaterial {
            m_b_curve: Some(m.curve()),
            .. Default::default()
        }
    }
 }
 impl From<MHPgram<f32>> for FullyGenericMaterial {
    fn from(m: MHPgram<f32>) -> Self {
        FullyGenericMaterial {
            m_h_curve: Some(m),
            .. Default::default()
        }
    }
 }
 impl From<Ferroxcube3R1MH> for FullyGenericMaterial {
    fn from(m: Ferroxcube3R1MH) -> Self {
        let curve: MHPgram<f32> = m.into();
        curve.into()
    }
 }
 // this is bitwise- and type-compatible with the spirv SimMeta, except we need serde traits
 #[derive(Clone, Default, Serialize, Deserialize)]
 pub struct SimMeta {
    pub(crate) dim: Index,
    pub(crate) inv_feature_size: f32,
    pub(crate) time_step: f32,
    pub(crate) feature_size: f32,
 }
 impl IntoFfi for SimMeta {
    type Ffi = ffi::SerializedSimMeta;
    fn into_ffi(self) -> Self::Ffi {
        Self::Ffi {
            dim: self.dim.0.into_ffi(),
            inv_feature_size: self.inv_feature_size,
            time_step: self.time_step,
            feature_size: self.feature_size,
        }
    }
 }
 /// Store the FFI form in memory, but serialize via the lib form.
 #[derive(Clone, Default, PartialEq)]
 pub struct Remote<F>(F);
 impl<F> Remote<F> {
    pub fn into_inner(self) -> F {
        self.0
    }
 }
 impl<L: IntoFfi> From<L> for Remote<L::Ffi> {
    fn from(l: L) -> Self {
        Remote(l.into_ffi())
    }
 }
 impl<F> std::ops::Deref for Remote<F> {
    type Target = F;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
 }
 impl<F> Serialize for Remote<F>
 where F: Clone + IntoLib,
      F::Lib: Serialize,
 {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let local = self.0.clone().into_lib();
        local.serialize(serializer)
    }
 }
 impl<'de, F> Deserialize<'de> for Remote<F>
    where F: IntoLib,
          F::Lib: Deserialize<'de> + IntoFfi<Ffi=F>,
 {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        let local: F::Lib = Deserialize::deserialize(deserializer)?;
        Ok(Remote(local.into_ffi()))
    }
 }
 // FUNCTION BINDINGS
 pub fn entry_points<L>() -> Option<(&'static str, &'static str)>
 where
    L: IntoFfi,
    L::Ffi: 'static
 {
    ffi::entry_points::<L::Ffi>().into_lib()
 }
--- a/crates/coremem/src/sim/spirv/cpu.rs
+++ b/crates/coremem/src/sim/spirv/cpu.rs
@@ -0,0 +1,49 @@
 use crate::diagnostics::SyncDiagnostics;
 use coremem_cross::mat::Material;
 use coremem_cross::real::Real;
 use coremem_cross::step::{SimMeta, StepEContext, StepHContext};
 use coremem_cross::vec::{Vec3, Vec3u};
 use super::SimBackend;
 #[derive(Default)]
 pub struct CpuBackend;
 impl<R: Real, M: Material<R>> SimBackend<R, M> for CpuBackend {
    fn step_n(
        &mut self,
        _diag: &SyncDiagnostics,
        meta: SimMeta<R>,
        mat: &[M],
        stim_e: &[Vec3<R>],
        stim_h: &[Vec3<R>],
        e: &mut [Vec3<R>],
        h: &mut [Vec3<R>],
        m: &mut [Vec3<R>],
        num_steps: u32,
    ) {
        for _ in 0..num_steps {
            // step E field
            apply_all_cells(meta.dim(), |idx| {
                StepEContext::step_flat_view(meta, mat, stim_e, e, h, idx);
            });
            // step H field
            apply_all_cells(meta.dim(), |idx| {
                StepHContext::step_flat_view(meta, mat, stim_h, e, h, m, idx);
            });
        }
    }
 }
 fn apply_all_cells<F: FnMut(Vec3u)>(dim: Vec3u, mut f: F) {
    for z in 0..dim.z() {
        for y in 0..dim.y() {
            for x in 0..dim.x() {
                f(Vec3u::new(x, y, z));
            }
        }
    }
 }
--- a/crates/coremem/src/sim/spirv/gpu.rs
+++ b/crates/coremem/src/sim/spirv/gpu.rs
@@ -0,0 +1,509 @@
 use futures::FutureExt as _;
 use log::info;
 use std::borrow::Cow;
 use std::num::NonZeroU64;
 use std::time::Duration;
 use wgpu;
 use wgpu::util::DeviceExt as _;
 use crate::diagnostics::SyncDiagnostics;
 use coremem_cross::vec::{Vec3, Vec3u};
 use coremem_cross::step::SimMeta;
 use spirv_backend::HasEntryPoints;
 use super::SimBackend;
 #[derive(Default)]
 pub struct WgpuBackend {
    handles: Option<(&'static str /* step_h */, &'static str /* step_e */, WgpuHandles)>,
 }
 struct WgpuHandles {
    step_bind_group_layout: wgpu::BindGroupLayout,
    step_e_pipeline: wgpu::ComputePipeline,
    step_h_pipeline: wgpu::ComputePipeline,
    device: wgpu::Device,
    queue: wgpu::Queue,
 }
 impl WgpuHandles {
    fn open<R, M: HasEntryPoints<R>>(dim: Vec3u) -> Self {
        info!("WgpuHandles::open({})", dim);
        use std::mem::size_of;
        let volume = dim.product_sum_usize() as u64;
        let max_elem_size = size_of::<M>().max(size_of::<Vec3<R>>());
        let max_array_size = volume * max_elem_size as u64;
        let max_buf_size = max_array_size + 0x1000; // allow some overhead
        let (device, queue) = futures::executor::block_on(open_device(max_buf_size));
        let shader_binary = get_shader();
        let shader_module = unsafe { device.create_shader_module_spirv(&shader_binary) };
        let (step_bind_group_layout, step_h_pipeline, step_e_pipeline) = make_pipelines(
            &device, &shader_module, M::step_h(), M::step_e()
        );
        WgpuHandles {
            step_bind_group_layout,
            step_h_pipeline,
            step_e_pipeline,
            device,
            queue,
        }
    }
 }
 // TODO: these bounds aren't 100% right. we're sending R and M over to the GPU by a bitwise copy.
 // that probably means the types should be Send + Copy
 impl<R: Copy, M: Send + Sync + HasEntryPoints<R>> SimBackend<R, M> for WgpuBackend {
    fn step_n(
        &mut self,
        diag: &SyncDiagnostics,
        meta: SimMeta<R>,
        mat: &[M],
        stim_cpu_e: &[Vec3<R>],
        stim_cpu_h: &[Vec3<R>],
        e: &mut [Vec3<R>],
        h: &mut [Vec3<R>],
        m: &mut [Vec3<R>],
        num_steps: u32,
    ) {
        let dim = meta.dim();
        let field_bytes = dim.product_sum() as usize * std::mem::size_of::<Vec3<f32>>();
        let (step_h, step_e, handles) = self.handles.get_or_insert_with(|| (
            M::step_h(),
            M::step_e(),
            WgpuHandles::open::<R, M>(dim)
        ));
        // if device is opened, make sure we're open for the right types
        assert_eq!(*step_h, M::step_h());
        assert_eq!(*step_e, M::step_e());
        let device = &handles.device;
        let queue = &handles.queue;
        let step_bind_group_layout = &handles.step_bind_group_layout;
        let step_e_pipeline = &handles.step_e_pipeline;
        let step_h_pipeline = &handles.step_h_pipeline;
        let timestamp_buffer = device.create_buffer(&wgpu::BufferDescriptor {
            label: Some("timestamps"),
            // each timestamp is 8 bytes, and we do 4 per step
            size: 8 * 4 * num_steps as u64,
            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
            mapped_at_creation: true,
        });
        timestamp_buffer.unmap();
        let sim_meta_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("gpu-side simulation metadata"),
            contents: to_bytes(&[meta][..]),
            usage: wgpu::BufferUsages::STORAGE,
        });
        let stim_e_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("gpu-side stimulus e field"),
            contents: to_bytes(stim_cpu_e),
            usage: wgpu::BufferUsages::STORAGE
        });
        let stim_h_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("gpu-side stimulus h field"),
            contents: to_bytes(stim_cpu_h),
            usage: wgpu::BufferUsages::STORAGE
        });
        let mat_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("gpu-side materials matrix"),
            contents: to_bytes(mat),
            // Can be used by the GPU and copied back to the CPU
            usage: wgpu::BufferUsages::STORAGE,
        });
        let e_field_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("gpu-side in/out e field"),
            contents: to_bytes(e),
            usage: wgpu::BufferUsages::STORAGE.union(wgpu::BufferUsages::COPY_SRC),
        });
        let h_field_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("gpu-side in/out h field"),
            contents: to_bytes(h),
            // Can be used by the GPU and copied back to the CPU
            usage: wgpu::BufferUsages::STORAGE.union(wgpu::BufferUsages::COPY_SRC),
        });
        let m_field_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
            label: Some("gpu-side in/out m field"),
            contents: to_bytes(m),
            // Can be used by the GPU and copied back to the CPU
            usage: wgpu::BufferUsages::STORAGE.union(wgpu::BufferUsages::COPY_SRC),
        });
        let e_readback_buffer = device.create_buffer(&wgpu::BufferDescriptor {
            label: Some("cpu-side copy of e output buffer"),
            size: field_bytes as wgpu::BufferAddress,
            // Can be read to the CPU, and can be copied from the shader's storage buffer
            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
            mapped_at_creation: false,
        });
        let h_readback_buffer = device.create_buffer(&wgpu::BufferDescriptor {
            label: Some("cpu-side copy of h output buffer"),
            size: field_bytes as wgpu::BufferAddress,
            // Can be read to the CPU, and can be copied from the shader's storage buffer
            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
            mapped_at_creation: false,
        });
        let m_readback_buffer = device.create_buffer(&wgpu::BufferDescriptor {
            label: Some("cpu-side copy of m output buffer"),
            size: field_bytes as wgpu::BufferAddress,
            // Can be read to the CPU, and can be copied from the shader's storage buffer
            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
            mapped_at_creation: false,
        });
        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
            label: None,
            layout: &step_bind_group_layout,
            entries: &[
                wgpu::BindGroupEntry {
                    binding: 0,
                    resource: sim_meta_buffer.as_entire_binding(),
                },
                wgpu::BindGroupEntry {
                    binding: 1,
                    resource: stim_e_buffer.as_entire_binding(),
                },
                wgpu::BindGroupEntry {
                    binding: 2,
                    resource: stim_h_buffer.as_entire_binding(),
                },
                wgpu::BindGroupEntry {
                    binding: 3,
                    resource: mat_buffer.as_entire_binding(),
                },
                wgpu::BindGroupEntry {
                    binding: 4,
                    resource: e_field_buffer.as_entire_binding(),
                },
                wgpu::BindGroupEntry {
                    binding: 5,
                    resource: h_field_buffer.as_entire_binding(),
                },
                wgpu::BindGroupEntry {
                    binding: 6,
                    resource: m_field_buffer.as_entire_binding(),
                },
            ],
        });
        let queries = device.create_query_set(&wgpu::QuerySetDescriptor {
            label: None,
            count: 4 * num_steps,
            ty: wgpu::QueryType::Timestamp,
        });
        let workgroups = ((dim.x()+3) / 4, (dim.y()+3) / 4, (dim.z()+3) / 4);
        let mut encoder =
            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
        for step in 0..num_steps {
            {
                let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
                cpass.set_bind_group(0, &bind_group, &[]);
                cpass.set_pipeline(&step_e_pipeline);
                cpass.write_timestamp(&queries, 4*step);
                cpass.dispatch(workgroups.0, workgroups.1, workgroups.2);
                cpass.write_timestamp(&queries, 4*step + 1);
            }
            {
                let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
                cpass.set_bind_group(0, &bind_group, &[]);
                cpass.set_pipeline(&step_h_pipeline);
                cpass.write_timestamp(&queries, 4*step + 2);
                cpass.dispatch(workgroups.0, workgroups.1, workgroups.2);
                cpass.write_timestamp(&queries, 4*step + 3);
            }
        }
        encoder.copy_buffer_to_buffer(
            &e_field_buffer,
            0,
            &e_readback_buffer,
            0,
            field_bytes as u64,
        );
        encoder.copy_buffer_to_buffer(
            &h_field_buffer,
            0,
            &h_readback_buffer,
            0,
            field_bytes as u64,
        );
        encoder.copy_buffer_to_buffer(
            &m_field_buffer,
            0,
            &m_readback_buffer,
            0,
            field_bytes as u64,
        );
        encoder.resolve_query_set(&queries, 0..4*num_steps, &timestamp_buffer, 0);
        diag.instrument_write_device(move || {
            queue.submit(Some(encoder.finish()));
        });
        let e_readback_slice = e_readback_buffer.slice(..);
        let e_readback_future = e_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
            e.copy_from_slice(unsafe {
                from_bytes(e_readback_slice.get_mapped_range().as_ref())
            });
            e_readback_buffer.unmap();
        });
        let h_readback_slice = h_readback_buffer.slice(..);
        let h_readback_future = h_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
            h.copy_from_slice(unsafe {
                from_bytes(h_readback_slice.get_mapped_range().as_ref())
            });
            h_readback_buffer.unmap();
        });
        let m_readback_slice = m_readback_buffer.slice(..);
        let m_readback_future = m_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
            m.copy_from_slice(unsafe {
                from_bytes(m_readback_slice.get_mapped_range().as_ref())
            });
            m_readback_buffer.unmap();
        });
        let timestamp_period = queue.get_timestamp_period();
        let timestamp_readback_slice = timestamp_buffer.slice(..);
        let timestamp_readback_future = timestamp_readback_slice.map_async(wgpu::MapMode::Read).then(|_| async {
            let (e_time, h_time) = {
                let mapped = timestamp_readback_slice.get_mapped_range();
                let timings: &[u64] = unsafe {
                    from_bytes(mapped.as_ref())
                };
                let (mut e_time, mut h_time) = (0, 0);
                for frame in timings.chunks(4) {
                    e_time += frame[1] - frame[0];
                    h_time += frame[3] - frame[2];
                }
                (
                    Duration::from_nanos((e_time as f64 * timestamp_period as f64) as u64),
                    Duration::from_nanos((h_time as f64 * timestamp_period as f64) as u64),
                )
            };
            timestamp_buffer.unmap();
            diag.record_step_device(e_time + h_time);
        });
        // optimization note: it may be possible to use `WaitForSubmission`
        // and copy data to/from even as the GPU begins executing the next job.
        device.poll(wgpu::Maintain::Wait);
        diag.instrument_read_device(move || {
            futures::executor::block_on(futures::future::join(
                e_readback_future, futures::future::join(
                    h_readback_future, futures::future::join(
                        m_readback_future, timestamp_readback_future
                    )
                )
            ));
        });
    }
 }
 /// Convert an arbitrary slice into a byte slice
 fn to_bytes<T>(slice: &[T]) -> &[u8] {
    unsafe {
        std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * std::mem::size_of::<T>())
    }
 }
 /// Convert a byte slice into a T slice
 unsafe fn from_bytes<T>(slice: &[u8]) -> &[T] {
    let elem_size = std::mem::size_of::<T>();
    let new_len = slice.len() / elem_size;
    assert_eq!(new_len * elem_size, slice.len());
    std::slice::from_raw_parts(slice.as_ptr() as *const T, new_len)
 }
 /// Loads the shader
 fn get_shader() -> wgpu::ShaderModuleDescriptorSpirV<'static> {
    let data = spirv_backend_runner::spirv_module();
    let spirv = Cow::Owned(wgpu::util::make_spirv_raw(&data).into_owned());
    wgpu::ShaderModuleDescriptorSpirV {
        label: None,
        source: spirv,
    }
 }
 async fn open_device(max_buf_size: u64) -> (wgpu::Device, wgpu::Queue) {
    // based on rust-gpu/examples/runners/wgpu/src/compute.rs:start_internal
    let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY);
    info!("open_device: got instance");
    let adapter = instance
        .request_adapter(&wgpu::RequestAdapterOptions {
            power_preference: wgpu::PowerPreference::HighPerformance,
            force_fallback_adapter: false,
            compatible_surface: None,
        })
        .await
        .expect("Failed to find an appropriate adapter");
    info!("open_device: got adapter");
    // XXX not all adapters will support non-default limits, and it could
    // cause perf degradations even on the ones that do. May want to consider
    // folding some buffers together to avoid this.
    let mut limits = wgpu::Limits::default();
    //limits.max_bind_groups = 5;
    //limits.max_dynamic_storage_buffers_per_pipeline_layout = 5;
    limits.max_storage_buffers_per_shader_stage = 7;
    //limits.max_storage_buffer_binding_size = 128 MiB.
    //limits.max_storage_buffer_binding_size = 1024 * (1 << 20);
    limits.max_storage_buffer_binding_size = max_buf_size.try_into().unwrap();
    let (device, queue) = adapter
        .request_device(
            &wgpu::DeviceDescriptor {
                label: None,
                features: (
                    wgpu::Features::empty()
                        .union(wgpu::Features::SPIRV_SHADER_PASSTHROUGH)
                        .union(wgpu::Features::TIMESTAMP_QUERY)
                ),
                limits,
            },
            None,
        )
        .await
        .expect("Failed to create device");
    info!("open_device: got device");
    (device, queue)
 }
 fn make_pipelines(
    device: &wgpu::Device,
    shader_module: &wgpu::ShaderModule,
    entry_step_h: &'static str,
    entry_step_e: &'static str
 ) -> (
    wgpu::BindGroupLayout, wgpu::ComputePipeline, wgpu::ComputePipeline
 ) {
    let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
        label: None,
        entries: &[
            wgpu::BindGroupLayoutEntry {
                // meta
                binding: 0,
                count: None,
                visibility: wgpu::ShaderStages::COMPUTE,
                ty: wgpu::BindingType::Buffer {
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
                    ty: wgpu::BufferBindingType::Storage { read_only: true },
                },
            },
            wgpu::BindGroupLayoutEntry {
                // stimulus(e)
                binding: 1,
                count: None,
                visibility: wgpu::ShaderStages::COMPUTE,
                ty: wgpu::BindingType::Buffer {
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
                    ty: wgpu::BufferBindingType::Storage { read_only: true },
                },
            },
            wgpu::BindGroupLayoutEntry {
                // stimulus(h)
                binding: 2,
                count: None,
                visibility: wgpu::ShaderStages::COMPUTE,
                ty: wgpu::BindingType::Buffer {
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
                    ty: wgpu::BufferBindingType::Storage { read_only: true },
                },
            },
            wgpu::BindGroupLayoutEntry {
                // materials
                binding: 3,
                count: None,
                visibility: wgpu::ShaderStages::COMPUTE,
                ty: wgpu::BindingType::Buffer {
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
                    ty: wgpu::BufferBindingType::Storage { read_only: true },
                },
            },
            wgpu::BindGroupLayoutEntry {
                // e field
                binding: 4,
                count: None,
                visibility: wgpu::ShaderStages::COMPUTE,
                ty: wgpu::BindingType::Buffer {
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
                    ty: wgpu::BufferBindingType::Storage { read_only: false },
                },
            },
            wgpu::BindGroupLayoutEntry {
                // h field
                binding: 5,
                count: None,
                visibility: wgpu::ShaderStages::COMPUTE,
                ty: wgpu::BindingType::Buffer {
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
                    ty: wgpu::BufferBindingType::Storage { read_only: false },
                },
            },
            wgpu::BindGroupLayoutEntry {
                // m field
                binding: 6,
                count: None,
                visibility: wgpu::ShaderStages::COMPUTE,
                ty: wgpu::BindingType::Buffer {
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(1).unwrap()),
                    ty: wgpu::BufferBindingType::Storage { read_only: false },
                },
            },
        ],
    });
    let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
        label: None,
        bind_group_layouts: &[&bind_group_layout],
        push_constant_ranges: &[],
    });
    let compute_step_h_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
        label: None,
        layout: Some(&pipeline_layout),
        module: shader_module,
        entry_point: entry_step_h,
    });
    let compute_step_e_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
        label: None,
        layout: Some(&pipeline_layout),
        module: shader_module,
        entry_point: entry_step_e,
    });
    (bind_group_layout, compute_step_h_pipeline, compute_step_e_pipeline)
 }
--- a/crates/coremem/src/sim/spirv/mod.rs
+++ b/crates/coremem/src/sim/spirv/mod.rs
--- a/crates/coremem/src/stim.rs
+++ b/crates/coremem/src/stim.rs
@@ -1,381 +0,0 @@
 use crate::real::*;
 use crate::geom::{Meters, Region, Vec3};
 use rand;
 type Fields = (Vec3<f32>, Vec3<f32>);
 pub trait AbstractStimulus: Sync {
    // TODO: might be cleaner to return some `Fields` type instead of a tuple
    /// Return the (E, H) field which should be added PER-SECOND to the provided position/time.
    fn at(&self, t_sec: f32, pos: Meters) -> Fields;
 }
 // impl<T: AbstractStimulus> AbstractStimulus for &T {
 //     fn at(&self, t_sec: f32, pos: Meters) -> Vec3 {
 //         (*self).at(t_sec, pos)
 //     }
 // }
 impl<T: AbstractStimulus> AbstractStimulus for Vec<T> {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
        let (mut e, mut h) = Fields::default();
        for i in self {
            let (de, dh) = i.at(t_sec, pos);
            e += de;
            h += dh;
        }
        (e, h)
    }
 }
 impl AbstractStimulus for Box<dyn AbstractStimulus> {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
        (**self).at(t_sec, pos)
    }
 }
 pub struct NoopStimulus;
 impl AbstractStimulus for NoopStimulus {
    fn at(&self, _t_sec: f32, _pos: Meters) -> Fields {
        Fields::default()
    }
 }
 pub struct UniformStimulus {
    e: Vec3<f32>,
    h: Vec3<f32>,
 }
 impl UniformStimulus {
    pub fn new(e: Vec3<f32>, h: Vec3<f32>) -> Self {
        Self { e, h }
    }
    pub fn new_e(e: Vec3<f32>) -> Self {
        Self::new(e, Vec3::zero())
    }
 }
 impl AbstractStimulus for UniformStimulus {
    fn at(&self, t_sec: f32, _pos: Meters) -> Fields {
        TimeVarying3::at(self, t_sec)
    }
 }
 impl TimeVarying for UniformStimulus {}
 impl TimeVarying3 for UniformStimulus {
    fn at(&self, _t_sec: f32) -> Fields {
        (self.e, self.h)
    }
 }
 pub struct RngStimulus {
    seed: u64,
    e_scale: f32,
    h_scale: f32,
 }
 impl RngStimulus {
    pub fn new(seed: u64) -> Self {
        Self { seed, e_scale: 1e15, h_scale: 1e15 }
    }
    pub fn new_e(seed: u64) -> Self {
        Self { seed, e_scale: 1e15, h_scale: 0.0 }
    }
    fn gen(&self, t_sec: f32, pos: Meters, scale: f32, salt: u64) -> Vec3<f32> {
        use rand::{Rng as _, SeedableRng as _};
        let seed = self.seed
            ^ (t_sec.to_bits() as u64)
            ^ ((pos.x().to_bits() as u64) << 8)
            ^ ((pos.y().to_bits() as u64) << 16)
            ^ ((pos.z().to_bits() as u64) << 24)
            ^ (salt << 32);
        let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
        Vec3::new(
            rng.gen_range(-scale..=scale),
            rng.gen_range(-scale..=scale),
            rng.gen_range(-scale..=scale),
        )
    }
 }
 impl AbstractStimulus for RngStimulus {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
        (self.gen(t_sec, pos, self.e_scale, 0), self.gen(t_sec, pos, self.h_scale, 0x7de3))
    }
 }
 /// Apply a time-varying stimulus uniformly across some region
 #[derive(Clone)]
 pub struct Stimulus<R, T> {
    region: R,
    stim: T,
 }
 impl<R, T> Stimulus<R, T> {
    pub fn new(region: R, stim: T) -> Self {
        Self {
            region, stim
        }
    }
 }
 impl<R: Region + Sync, T: TimeVarying3 + Sync> AbstractStimulus for Stimulus<R, T> {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
        if self.region.contains(pos) {
            self.stim.at(t_sec)
        } else {
            Fields::default()
        }
    }
 }
 /// Apply a time-varying stimulus across some region.
 /// The stimulus seen at each point is based on its angle about the specified ray.
 #[derive(Clone)]
 pub struct CurlStimulus<R, T> {
    region: R,
    stim: T,
    center: Meters,
    axis: Meters,
 }
 impl<R, T> CurlStimulus<R, T> {
    pub fn new(region: R, stim: T, center: Meters, axis: Meters) -> Self {
        Self { region, stim, center, axis }
    }
 }
 impl<R: Region + Sync, T: TimeVarying1 + Sync> AbstractStimulus for CurlStimulus<R, T> {
    fn at(&self, t_sec: f32, pos: Meters) -> Fields {
        if self.region.contains(pos) {
            let (amt_e, amt_h) = self.stim.at(t_sec);
            let from_center_to_point = *pos - *self.center;
            let rotational = from_center_to_point.cross(*self.axis);
            let impulse_e = rotational.with_mag(amt_e.cast());
            let impulse_h = rotational.with_mag(amt_h.cast());
            (impulse_e, impulse_h)
        } else {
            Fields::default()
        }
    }
 }
 pub trait TimeVarying: Sized {
    fn shifted(self, new_start: f32) -> Shifted<Self> {
        Shifted::new(self, new_start)
    }
    fn gated(self, from: f32, to: f32) -> Gated<Self> {
        Gated::new(self, from, to)
    }
 }
 pub trait TimeVarying1: TimeVarying {
    /// Retrieve the (E, H) impulse to apply PER-SECOND at the provided time (in seconds).
    fn at(&self, t_sec: f32) -> (f32, f32);
 }
 pub trait TimeVarying3: TimeVarying {
    /// Retrieve the (E, H) impulse to apply PER-SECOND at the provided time (in seconds).
    fn at(&self, t_sec: f32) -> Fields;
 }
 // assumed to represent the E field
 impl TimeVarying for f32 {}
 impl TimeVarying1 for f32 {
    fn at(&self, _t_sec: f32) -> (f32, f32) {
        (*self, 0.0)
    }
 }
 /// E field which changes magnitude sinusoidally as a function of t
 #[derive(Clone)]
 pub struct Sinusoid<A> {
    amp: A,
    omega: f32,
 }
 pub type Sinusoid1 = Sinusoid<f32>;
 pub type Sinusoid3 = Sinusoid<Vec3<f32>>;
 impl<A> Sinusoid<A> {
    pub fn new(amp: A, freq: f32) -> Self {
        Self {
            amp,
            omega: freq * f32::two_pi(),
        }
    }
    pub fn from_wavelength(amp: A, lambda: f32) -> Self {
        Self::new(amp, 1.0/lambda)
    }
    pub fn freq(&self) -> f32 {
        self.omega / f32::two_pi()
    }
    pub fn wavelength(&self) -> f32 {
        1.0 / self.freq()
    }
    pub fn one_cycle(self) -> Gated<Self> {
        let wl = self.wavelength();
        Gated::new(self, 0.0, wl)
    }
    pub fn half_cycle(self) -> Gated<Self> {
        let wl = self.wavelength();
        Gated::new(self, 0.0, 0.5 * wl)
    }
 }
 impl<A> TimeVarying for Sinusoid<A> {}
 impl TimeVarying1 for Sinusoid1 {
    fn at(&self, t_sec: f32) -> (f32, f32) {
        (
            self.amp * (t_sec * self.omega).sin(),
            0.0,
        )
    }
 }
 impl TimeVarying3 for Sinusoid3 {
    fn at(&self, t_sec: f32) -> Fields {
        (
            self.amp * (t_sec * self.omega).sin(),
            Vec3::zero(),
        )
    }
 }
 /// E field with magnitude that decays exponentially over t.
 #[derive(Clone)]
 pub struct Exp<A> {
    amp: A,
    tau: f32,
 }
 pub type Exp1 = Exp<f32>;
 pub type Exp3 = Exp<Vec3<f32>>;
 impl<A> Exp<A> {
    pub fn new(amp: A, half_life: f32) -> Self {
        let tau = std::f32::consts::LN_2/half_life;
        Self { amp, tau }
    }
    pub fn new_at(amp: A, start: f32, half_life: f32) -> Shifted<Gated<Self>> {
        Self::new(amp, half_life)
            .gated(0.0, half_life*100.0)
            .shifted(start)
    }
 }
 impl<A> TimeVarying for Exp<A> {}
 impl TimeVarying1 for Exp1 {
    fn at(&self, t_sec: f32) -> (f32, f32) {
        (
            self.amp * (t_sec * -self.tau).exp(),
            0.0,
        )
    }
 }
 impl TimeVarying3 for Exp3 {
    fn at(&self, t_sec: f32) -> Fields {
        (
            self.amp * (t_sec * -self.tau).exp(),
            Vec3::zero(),
        )
    }
 }
 #[derive(Clone)]
 pub struct Gated<T> {
    inner: T,
    start: f32,
    end: f32,
 }
 impl<T> Gated<T> {
    pub fn new(inner: T, start: f32, end: f32) -> Self {
        Self { inner, start, end }
    }
 }
 impl<T> TimeVarying for Gated<T> {}
 impl<T: TimeVarying1> TimeVarying1 for Gated<T> {
    fn at(&self, t_sec: f32) -> (f32, f32) {
        if (self.start..self.end).contains(&t_sec) {
            self.inner.at(t_sec)
        } else {
            Default::default()
        }
    }
 }
 impl<T: TimeVarying3> TimeVarying3 for Gated<T> {
    fn at(&self, t_sec: f32) -> Fields {
        if (self.start..self.end).contains(&t_sec) {
            self.inner.at(t_sec)
        } else {
            Default::default()
        }
    }
 }
 #[derive(Clone)]
 pub struct Shifted<T> {
    inner: T,
    start_at: f32,
 }
 impl<T> Shifted<T> {
    pub fn new(inner: T, start_at: f32) -> Self {
        Self { inner, start_at }
    }
 }
 impl<T> TimeVarying for Shifted<T> {}
 impl<T: TimeVarying1> TimeVarying1 for Shifted<T> {
    fn at(&self, t_sec: f32) -> (f32, f32) {
        self.inner.at(t_sec - self.start_at)
    }
 }
 impl<T: TimeVarying3> TimeVarying3 for Shifted<T> {
    fn at(&self, t_sec: f32) -> Fields {
        self.inner.at(t_sec - self.start_at)
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    macro_rules! assert_approx_eq {
        ($x:expr, $e:expr, $h:expr) => {
            let x = $x;
            let e = $e;
            let h = $h;
            let diff_e = (x.0 - e).mag();
            assert!(diff_e <= 0.001, "{:?} != {:?}", x, e);
            let diff_h = (x.1 - h).mag();
            assert!(diff_h <= 0.001, "{:?} != {:?}", x, h);
        }
    }
    #[test]
    fn sinusoid3() {
        let s = Sinusoid3::new(Vec3::new(10.0, 1.0, -100.0), 1000.0);
        assert_eq!(s.at(0.0), (Vec3::zero(), Vec3::zero()));
        assert_approx_eq!(s.at(0.00025),
            Vec3::new(10.0, 1.0, -100.0), Vec3::zero()
        );
        assert_approx_eq!(s.at(0.00050), Vec3::zero(), Vec3::zero());
        assert_approx_eq!(s.at(0.00075), Vec3::new(-10.0, -1.0, 100.0), Vec3::zero());
    }
    #[test]
    fn sinusoid3_from_wavelength() {
        let s = Sinusoid3::from_wavelength(Vec3::new(10.0, 1.0, -100.0), 0.001);
        assert_eq!(s.at(0.0), (Vec3::zero(), Vec3::zero()));
        assert_approx_eq!(s.at(0.00025), Vec3::new(10.0, 1.0, -100.0), Vec3::zero());
        assert_approx_eq!(s.at(0.00050), Vec3::zero(), Vec3::zero());
        assert_approx_eq!(s.at(0.00075), Vec3::new(-10.0, -1.0, 100.0), Vec3::zero());
    }
 }
--- a/crates/coremem/src/stim/fields.rs
+++ b/crates/coremem/src/stim/fields.rs
@@ -0,0 +1,123 @@
 //! supporting types for basic Stimulus trait/impls
 use crate::real::Real;
 use crate::cross::vec::Vec3;
 /// field densities
 #[derive(Clone, Copy, Debug, Default, PartialEq)]
 pub struct Fields<R> {
    pub e: Vec3<R>,
    pub h: Vec3<R>,
 }
 impl<R> Fields<R> {
    pub fn new_eh(e: Vec3<R>, h: Vec3<R>) -> Self {
        Self { e, h}
    }
 }
 impl<R: Real> Fields<R> {
    pub fn e(&self) -> Vec3<R> {
        self.e
    }
    pub fn h(&self) -> Vec3<R> {
        self.h
    }
    pub fn new_e(e: Vec3<R>) -> Self {
        Self::new_eh(e, Vec3::zero())
    }
    pub fn new_h(h: Vec3<R>) -> Self {
        Self::new_eh(Vec3::zero(), h)
    }
    pub fn elem_mul(self, other: FieldMags<R>) -> Fields<R> {
        Fields {
            e: self.e * other.e,
            h: self.h * other.h,
        }
    }
 }
 impl<R: Real> std::ops::AddAssign for Fields<R> {
    fn add_assign(&mut self, other: Self) {
        self.e += other.e;
        self.h += other.h;
    }
 }
 impl<R: Real> std::ops::Add for Fields<R> {
    type Output = Self;
    fn add(mut self, other: Self) -> Self::Output {
        self += other;
        self
    }
 }
 impl<R: Real> std::ops::Mul<R> for Fields<R> {
    type Output = Self;
    fn mul(self, scale: R) -> Self::Output {
        Fields {
            e: self.e * scale,
            h: self.h * scale,
        }
    }
 }
 /// field magnitude densities (really, signed magnitude)
 #[derive(Clone, Copy, Debug, Default, PartialEq)]
 pub struct FieldMags<R> {
    pub e: R,
    pub h: R,
 }
 impl<R: Real> std::ops::AddAssign for FieldMags<R> {
    fn add_assign(&mut self, other: Self) {
        self.e += other.e;
        self.h += other.h;
    }
 }
 impl<R: Real> std::ops::Add for FieldMags<R> {
    type Output = Self;
    fn add(mut self, other: Self) -> Self::Output {
        self += other;
        self
    }
 }
 impl<R: Real> std::ops::Mul<R> for FieldMags<R> {
    type Output = Self;
    fn mul(self, scale: R) -> Self::Output {
        FieldMags {
            e: self.e * scale,
            h: self.h * scale,
        }
    }
 }
 impl<R> FieldMags<R> {
    pub fn new_eh(e: R, h: R) -> Self {
        Self { e, h }
    }
 }
 impl<R: Real> FieldMags<R> {
    pub fn e(&self) -> R {
        self.e
    }
    pub fn h(&self) -> R {
        self.h
    }
    pub fn new_e(e: R) -> Self {
        Self::new_eh(e, R::zero())
    }
    pub fn new_h(h: R) -> Self {
        Self::new_eh(R::zero(), h)
    }
    pub fn elem_mul(self, other: Self) -> Self {
        FieldMags {
            e: self.e * other.e,
            h: self.h * other.h,
        }
    }
 }
--- a/crates/coremem/src/stim/mod.rs
+++ b/crates/coremem/src/stim/mod.rs
@@ -0,0 +1,295 @@
 use crate::cross::vec::Vec3;
 use crate::geom::{Coord as _, Index, Meters};
 use crate::real::Real;
 use coremem_cross::dim::DimSlice;
 use coremem_cross::vec::Vec3u;
 use std::borrow::Cow;
 use std::ops::Deref;
 use rand;
 mod fields;
 mod time_varying;
 mod vector_field;
 pub use fields::{Fields, FieldMags};
 pub use time_varying::{
    Exp,
    Gated,
    Pulse,
    Scaled,
    Shifted,
    Sinusoid,
    Summed,
    TimeVarying,
    TimeVaryingExt,
    UnitEH,
 };
 pub use vector_field::{
    CurlVectorField,
    RegionGated,
    VectorField,
 };
 pub trait Stimulus<R: Real>: Sync {
    /// Return the (E, H) field which should be added PER-SECOND to the provided position/time.
    fn at(&self, t_sec: R, feat_size: R, loc: Index) -> Fields<R>;
    /// compute the value of this stimulus across all the simulation space
    fn rendered<'a>(
        &'a self, scale: R, t_sec: R, feature_size: R, dim: Vec3u
    ) -> Cow<'a, RenderedStimulus<R>> {
        Cow::Owned(render_stim(self, scale, t_sec, feature_size, dim))
    }
 }
 fn render_stim<R: Real, S: Stimulus<R> + ?Sized>(
    stim: &S, scale: R, t_sec: R, feature_size: R, dim: Vec3u
 ) -> RenderedStimulus<R> {
    let dim_len = dim.product_sum_usize();
    let mut e = Vec::new();
    e.resize_with(dim_len, Default::default);
    let mut h = Vec::new();
    h.resize_with(dim_len, Default::default);
    rayon::scope(|s| {
        let mut undispatched_e = &mut e[..];
        let mut undispatched_h = &mut h[..];
        for z in 0..dim.z() {
            for y in 0..dim.y() {
                let (this_e, this_h);
                (this_e, undispatched_e) = undispatched_e.split_at_mut(dim.x() as usize);
                (this_h, undispatched_h) = undispatched_h.split_at_mut(dim.x() as usize);
                s.spawn(move |_| {
                    for (x, (out_e, out_h)) in this_e.iter_mut().zip(this_h.iter_mut()).enumerate() {
                        let Fields { e, h } = stim.at(t_sec, feature_size, Index::new(x as u32, y, z));
                        *out_e = e * scale;
                        *out_h = h * scale;
                    }
                });
            }
        }
    });
    let field_e = DimSlice::new(dim, e);
    let field_h = DimSlice::new(dim, h);
    RenderedStimulus::new(
        field_e, field_h, scale, feature_size, t_sec
    )
 }
 #[derive(Clone)]
 pub struct RenderedStimulus<R> {
    e: DimSlice<Vec<Vec3<R>>>,
    h: DimSlice<Vec<Vec3<R>>>,
    scale: R,
    feature_size: R,
    t_sec: R,
 }
 impl<R> RenderedStimulus<R> {
    pub fn new(
        e: DimSlice<Vec<Vec3<R>>>,
        h: DimSlice<Vec<Vec3<R>>>,
        scale: R,
        feature_size: R,
        t_sec: R,
    ) -> Self {
        Self { e, h, scale, feature_size, t_sec }
    }
    pub fn e<'a>(&'a self) -> DimSlice<&'a [Vec3<R>]> {
        self.e.as_ref()
    }
    pub fn h<'a>(&'a self) -> DimSlice<&'a [Vec3<R>]> {
        self.h.as_ref()
    }
 }
 impl<R: Real> RenderedStimulus<R> {
    pub fn scale(&self) -> R {
        self.scale
    }
    pub fn feature_size(&self) -> R {
        self.feature_size
    }
    pub fn time(&self) -> R {
        self.t_sec
    }
 }
 // TODO: is this necessary?
 impl<R: Real> VectorField<R> for RenderedStimulus<R> {
    fn at(&self, _feat_size: R, loc: Index) -> Fields<R> {
        Fields::new_eh(self.e[loc.into()], self.h[loc.into()])
    }
 }
 impl<R: Real> Stimulus<R> for RenderedStimulus<R> {
    fn at(&self, _t_sec: R, _feat_size: R, loc: Index) -> Fields<R> {
        Fields::new_eh(self.e[loc.into()], self.h[loc.into()])
    }
    fn rendered<'a>(
        &'a self, scale: R, t_sec: R, feature_size: R, dim: Vec3u
    ) -> Cow<'a, RenderedStimulus<R>> {
        if (self.scale, self.t_sec, self.feature_size, self.e.dim()) == (scale, t_sec, feature_size, dim) {
            Cow::Borrowed(self)
        } else {
            Cow::Owned(render_stim(self, scale, t_sec, feature_size, dim))
        }
    }
 }
 impl<R: Real> Stimulus<R> for Fields<R> {
    fn at(&self, _t_sec: R, _feat_size: R, _loc: Index) -> Fields<R> {
        *self
    }
 }
 /// a VectorField type whose amplitude is modulated by a TimeVarying component.
 /// users will almost always use this as their stimulus implementation
 pub struct ModulatedVectorField<V, T> {
    fields: V,
    modulation: T,
 }
 impl<V, T> ModulatedVectorField<V, T> {
    pub fn new(fields: V, modulation: T) -> Self {
        Self { fields, modulation }
    }
    pub fn into_inner(self) -> (V, T) {
        (self.fields, self.modulation)
    }
    pub fn fields(&self) -> &V {
        &self.fields
    }
    pub fn modulation(&self) -> &T {
        &self.modulation
    }
 }
 impl<R: Real, V: VectorField<R> + Sync, T: TimeVarying<R> + Sync> Stimulus<R> for ModulatedVectorField<V, T> {
    fn at(&self, t_sec: R, feat_size: R, loc: Index) -> Fields<R> {
        self.fields.at(feat_size, loc).elem_mul(self.modulation.at(t_sec))
    }
 }
 /// used as a MapVisitor in order to evaluate each Stimulus in a List at a specific time/place.
 // struct StimulusEvaluator {
 //     fields: Fields,
 //     t_sec: f32,
 //     feat_size: f32,
 //     loc: Index,
 // }
 //
 // impl<S: Stimulus> Visitor<&S> for &mut StimulusEvaluator {
 //     fn visit(&mut self, next: &S) {
 //         self.fields += next.at(self.t_sec, self.feat_size, self.loc);
 //     }
 // }
 //
 // impl<L: Sync> Stimulus for L
 // where
 //     for<'a, 'b> &'a L: Visit<&'b mut StimulusEvaluator>,
 // {
 //     fn at(&self, t_sec: f32, pos: Meters) -> Fields {
 //         let mut ev = StimulusEvaluator { t_sec, pos, fields: Fields::default()};
 //         self.visit(&mut ev);
 //         ev.fields
 //     }
 // }
 // conflicts with List implementation
 // impl<T: Stimulus> Stimulus for &T {
 //     fn at(&self, t_sec: f32, feat_size: f32, loc: Index) -> Fields {
 //         (*self).at(t_sec, feat_size, loc)
 //     }
 // }
 pub struct StimuliVec<S>(Vec<S>);
 pub type DynStimuli<R> = StimuliVec<Box<dyn Stimulus<R> + Send>>;
 impl<S> Default for StimuliVec<S> {
    fn default() -> Self {
        Self(Vec::new())
    }
 }
 impl<S> Deref for StimuliVec<S> {
    type Target = Vec<S>;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
 }
 impl<S> StimuliVec<S> {
    pub fn new() -> Self {
        Self::default()
    }
    pub fn from_vec(stim: Vec<S>) -> Self {
        Self(stim)
    }
    pub fn push(&mut self, a: S) {
        self.0.push(a)
    }
 }
 impl<R: Real, S: Stimulus<R>> Stimulus<R> for StimuliVec<S> {
    fn at(&self, t_sec: R, feat_size: R, loc: Index) -> Fields<R> {
        self.0.iter().map(|i| i.at(t_sec, feat_size, loc))
            .fold(Fields::default(), core::ops::Add::add)
    }
 }
 impl<R: Real> Stimulus<R> for Box<dyn Stimulus<R> + Send> {
    fn at(&self, t_sec: R, feat_size: R, loc: Index) -> Fields<R> {
        (**self).at(t_sec, feat_size, loc)
    }
 }
 pub struct NoopStimulus;
 impl<R: Real> Stimulus<R> for NoopStimulus {
    fn at(&self, _t_sec: R, _feat_size: R, _loc: Index) -> Fields<R> {
        Fields::default()
    }
 }
 pub struct RngStimulus {
    seed: u64,
    e_scale: f32,
    h_scale: f32,
 }
 impl RngStimulus {
    pub fn new(seed: u64) -> Self {
        Self { seed, e_scale: 1e15, h_scale: 1e15 }
    }
    pub fn new_e(seed: u64) -> Self {
        Self { seed, e_scale: 1e15, h_scale: 0.0 }
    }
    fn gen(&self, t_sec: f32, pos: Meters, scale: f32, salt: u64) -> Vec3<f32> {
        use rand::{Rng as _, SeedableRng as _};
        let seed = self.seed
            ^ (t_sec.to_bits() as u64)
            ^ ((pos.x().to_bits() as u64) << 8)
            ^ ((pos.y().to_bits() as u64) << 16)
            ^ ((pos.z().to_bits() as u64) << 24)
            ^ (salt << 32);
        let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
        Vec3::new(
            rng.gen_range(-scale..=scale),
            rng.gen_range(-scale..=scale),
            rng.gen_range(-scale..=scale),
        )
    }
 }
 impl<R: Real> Stimulus<R> for RngStimulus {
    fn at(&self, t_sec: R, feat_size: R, loc: Index) -> Fields<R> {
        Fields {
            e: self.gen(t_sec.cast(), loc.to_meters(feat_size.cast()), self.e_scale, 0).cast(),
            h: self.gen(t_sec.cast(), loc.to_meters(feat_size.cast()), self.h_scale, 0x7de3).cast(),
        }
    }
 }
--- a/crates/coremem/src/stim/time_varying.rs
+++ b/crates/coremem/src/stim/time_varying.rs
@@ -0,0 +1,252 @@
 //! time-varying portions of a Stimulus
 use crate::real::{self, Real};
 use crate::stim::FieldMags;
 pub trait TimeVarying<R> {
    fn at(&self, t_sec: R) -> FieldMags<R>;
 }
 pub trait TimeVaryingExt<R>: Sized {
    fn shifted(self, new_start: R) -> Shifted<R, Self> {
        Shifted::new(self, new_start)
    }
    fn gated(self, from: R, to: R) -> Gated<R, Self> {
        Gated::new(Pulse::new(from, to), self)
    }
    fn scaled<T: TimeVarying<R>>(self, scale: T) -> Scaled<Self, T> {
        Scaled::new(self, scale)
    }
    fn summed<T: TimeVarying<R>>(self, with: T) -> Summed<Self, T> {
        Summed::new(self, with)
    }
 }
 impl<R, T> TimeVaryingExt<R> for T {}
 impl<R: Real> TimeVarying<R> for FieldMags<R> {
    fn at(&self, _t_sec: R) -> FieldMags<R> {
        *self
    }
 }
 // assumed to represent the E field
 impl<R: Real> TimeVarying<real::Finite<R>> for real::Finite<R> {
    fn at(&self, _t_sec: real::Finite<R>) -> FieldMags<real::Finite<R>> {
        FieldMags::new_e(*self)
    }
 }
 impl TimeVarying<f32> for f32 {
    fn at(&self, _t_sec: f32) -> FieldMags<f32> {
        FieldMags::new_e(*self)
    }
 }
 impl TimeVarying<f64> for f64 {
    fn at(&self, _t_sec: f64) -> FieldMags<f64> {
        FieldMags::new_e(*self)
    }
 }
 // Vec<T> at any `t_sec` behaves as the sum of all its components at that time.
 impl<R: Real, T: TimeVarying<R>> TimeVarying<R> for Vec<T> {
    fn at(&self, t_sec: R) -> FieldMags<R> {
        self.iter().fold(FieldMags::default(), |acc, i| acc + i.at(t_sec))
    }
 }
 pub struct UnitEH;
 impl<R: Real> TimeVarying<R> for UnitEH {
    fn at(&self, _t_sec: R) -> FieldMags<R> {
        FieldMags::new_eh(R::one(), R::one())
    }
 }
 /// E field which changes magnitude sinusoidally as a function of t
 #[derive(Clone)]
 pub struct Sinusoid<R> {
    omega: R,
 }
 impl<R: Real> Sinusoid<R> {
    pub fn new(freq: R) -> Self {
        Self {
            omega: freq * R::two_pi(),
        }
    }
    pub fn from_wavelength(lambda: R) -> Self {
        Self::new(lambda.inv())
    }
    pub fn freq(&self) -> R {
        self.omega / R::two_pi()
    }
    pub fn wavelength(&self) -> R {
        self.freq().inv()
    }
    pub fn one_cycle(self) -> Gated<R, Self> {
        let wl = self.wavelength();
        self.gated(R::zero(), wl)
    }
    pub fn half_cycle(self) -> Gated<R, Self> {
        let wl = self.wavelength();
        self.gated(R::zero(), R::half() * wl)
    }
 }
 impl<R: Real> TimeVarying<R> for Sinusoid<R> {
    fn at(&self, t_sec: R) -> FieldMags<R> {
        let v = (t_sec * self.omega).sin();
        FieldMags::new_eh(v, v)
    }
 }
 /// E field that decays exponentially over t.
 /// zero for all t < 0
 #[derive(Clone)]
 pub struct Exp<R> {
    tau: R,
 }
 impl<R: Real + TimeVarying<R>> Exp<R> {
    pub fn new(half_life: R) -> Self {
        let tau = R::ln2()/half_life;
        Self { tau }
    }
    pub fn new_at(amp: R, start: R, half_life: R) -> Shifted<R, Gated<R, Scaled<Self, R>>> {
        Self::new(half_life).scaled(amp).gated(R::zero(), half_life*100f32.cast::<R>()).shifted(start)
    }
 }
 impl<R: Real> TimeVarying<R> for Exp<R> {
    fn at(&self, t_sec: R) -> FieldMags<R> {
        let a = if t_sec < R::zero() {
            // queries for very negative `t_sec` could cause `a` to explode
            // and IEEE 754 makes exp(LARGE) be infinity.
            // later, these queries are gated with a multiply-by-zero,
            // but 0 times INF is NaN.
            // so make this zero-valued before the moment of interest.
            // (an alternative would be to set it to 1.0).
            R::zero()
        } else {
            (t_sec * -self.tau).exp()
        };
        FieldMags::new_eh(a, a)
    }
 }
 /// pulses E=1.0 and H=1.0 over the provided duration.
 /// this is used as a building block to gate some VectorField over a specific time.
 #[derive(Clone)]
 pub struct Pulse<R> {
    start: R,
    end: R,
 }
 impl<R> Pulse<R> {
    pub fn new(start: R, end: R) -> Self {
        Self { start, end }
    }
 }
 impl<R: Real> Pulse<R> {
    fn contains(&self, t: R) -> bool {
        t >= self.start && t < self.end
    }
 }
 impl<R: Real> TimeVarying<R> for Pulse<R> {
    fn at(&self, t: R) -> FieldMags<R> {
        if self.contains(t) {
            FieldMags::new_eh(R::one(), R::one())
        } else {
            FieldMags::new_eh(R::zero(), R::zero())
        }
    }
 }
 pub type Gated<R, T> = Scaled<Pulse<R>, T>;
 #[derive(Clone)]
 pub struct Shifted<R, T> {
    start_at: R,
    inner: T,
 }
 impl<R, T> Shifted<R, T> {
    pub fn new(inner: T, start_at: R) -> Self {
        Self { inner, start_at }
    }
 }
 impl<R: Real, T: TimeVarying<R>> TimeVarying<R> for Shifted<R, T> {
    fn at(&self, t_sec: R) -> FieldMags<R> {
        self.inner.at(t_sec - self.start_at)
    }
 }
 #[derive(Clone)]
 pub struct Scaled<A, B>(A, B);
 impl<A, B> Scaled<A, B> {
    pub fn new(a: A, b: B) -> Self {
        Self(a, b)
    }
 }
 impl<R: Real, A: TimeVarying<R>, B: TimeVarying<R>> TimeVarying<R> for Scaled<A, B> {
    fn at(&self, t_sec: R) -> FieldMags<R> {
        self.0.at(t_sec).elem_mul(self.1.at(t_sec))
    }
 }
 #[derive(Clone)]
 pub struct Summed<A, B>(A, B);
 impl<A, B> Summed<A, B> {
    pub fn new(a: A, b: B) -> Self {
        Self(a, b)
    }
 }
 impl<R: Real, A: TimeVarying<R>, B: TimeVarying<R>> TimeVarying<R> for Summed<A, B> {
    fn at(&self, t_sec: R) -> FieldMags<R> {
        self.0.at(t_sec) + self.1.at(t_sec)
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    macro_rules! assert_approx_eq {
        ($x:expr, $e:expr, $h:expr) => {
            let x = $x;
            let e = $e;
            let h = $h;
            let diff_e = (x.e - e).abs();
            assert!(diff_e <= 0.001, "{:?} != {:?}", x, e);
            let diff_h = (x.h - h).abs();
            assert!(diff_h <= 0.001, "{:?} != {:?}", x, h);
        }
    }
    #[test]
    fn sinusoid() {
        let s = Sinusoid::new(1000.0);
        assert_eq!(s.at(0.0), FieldMags::default());
        assert_approx_eq!(s.at(0.00025), 1.0, 1.0);
        assert_approx_eq!(s.at(0.00050), 0.0, 0.0);
        assert_approx_eq!(s.at(0.00075), -1.0, -1.0);
    }
    #[test]
    fn sinusoid_from_wavelength() {
        let s = Sinusoid::from_wavelength(0.001);
        assert_eq!(s.at(0.0), FieldMags::default());
        assert_approx_eq!(s.at(0.00025), 1.0, 1.0);
        assert_approx_eq!(s.at(0.00050), 0.0, 0.0);
        assert_approx_eq!(s.at(0.00075), -1.0, -1.0);
    }
 }
--- a/crates/coremem/src/stim/vector_field.rs
+++ b/crates/coremem/src/stim/vector_field.rs
@@ -0,0 +1,136 @@
 use crate::geom::{Coord as _, HasCrossSection, Index, Region};
 use crate::real::Real;
 use crate::stim::Fields;
 use coremem_cross::dim::DimSlice;
 use coremem_cross::vec::Vec3u;
 /// a static vector field. different value at each location, but constant in time.
 /// often used as a building block by wrapping it in something which modulates the fields over
 /// time.
 pub trait VectorField<R> {
    fn at(&self, feat_size: R, loc: Index) -> Fields<R>;
 }
 // a vec of VectorFields is the sum of those fields
 impl<R: Real, V: VectorField<R>> VectorField<R> for Vec<V> {
    fn at(&self, feat_size: R, loc: Index) -> Fields<R> {
        let mut acc = Fields::default();
        for v in self {
            acc += v.at(feat_size, loc);
        }
        acc
    }
 }
 // uniform vector field
 impl<R: Real> VectorField<R> for Fields<R> {
    fn at(&self, _feat_size: R, _loc: Index) -> Fields<R> {
        *self
    }
 }
 // could broaden this and implement directly on T, but blanket impls
 // are unwieldy
 impl<R: Real, T> VectorField<R> for DimSlice<T>
 where
    DimSlice<T>: core::ops::Index<Vec3u, Output=Fields<R>>
 {
    fn at(&self, _feat_size: R, loc: Index) -> Fields<R> {
        self[loc.into()]
    }
 }
 /// restrict the VectorField to just the specified region, letting it be zero everywhere else
 #[derive(Clone)]
 pub struct RegionGated<G, V> {
    region: G,
    field: V,
 }
 impl<G, V> RegionGated<G, V> {
    pub fn new(region: G, field: V) -> Self {
        Self {
            region, field
        }
    }
 }
 impl<R: Real, G: Region + Sync, V: VectorField<R>> VectorField<R> for RegionGated<G, V> {
    fn at(&self, feat_size: R, loc: Index) -> Fields<R> {
        if self.region.contains(loc.to_meters(feat_size.cast())) {
            self.field.at(feat_size, loc)
        } else {
            Fields::default()
        }
    }
 }
 /// VectorField whose field at each point is based on its angle about the specified ray.
 /// the field has equal E and H vectors. if you want just one, filter it out with `Scaled`.
 #[derive(Clone)]
 pub struct CurlVectorField<G> {
    region: G,
 }
 impl<G> CurlVectorField<G> {
    pub fn new(region: G) -> Self {
        Self { region }
    }
 }
 impl<R: Real, G: Region + HasCrossSection> VectorField<R> for CurlVectorField<G> {
    fn at(&self, feat_size: R, loc: Index) -> Fields<R> {
        let pos = loc.to_meters(feat_size.cast());
        if self.region.contains(pos) {
            // TODO: do we *want* this to be normalized?
            let rotational = self.region.cross_section_normal(pos).norm().cast();
            Fields::new_eh(rotational, rotational)
        } else {
            Fields::default()
        }
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use crate::cross::vec::Vec3;
    use crate::geom::Meters;
    struct MockRegion {
        normal: Vec3<f32>,
    }
    impl HasCrossSection for MockRegion {
        fn cross_section_normal(&self, _p: Meters) -> Vec3<f32> {
            self.normal
        }
    }
    impl Region for MockRegion {
        fn contains(&self, _p: Meters) -> bool {
            true
        }
    }
    #[test]
    fn curl_stimulus_trivial() {
        let region = MockRegion {
            normal: Vec3::new(1.0, 0.0, 0.0)
        };
        let stim = CurlVectorField::new(region);
        assert_eq!(stim.at(1.0, Index::new(0, 0, 0)), Fields {
            e: Vec3::new(1.0, 0.0, 0.0),
            h: Vec3::new(1.0, 0.0, 0.0),
        });
    }
    #[test]
    fn curl_stimulus_multi_axis() {
        let region = MockRegion {
            normal: Vec3::new(0.0, -1.0, 1.0)
        };
        let stim = CurlVectorField::new(region);
        let Fields { e, h } = stim.at(1.0, Index::new(0, 0, 0));
        assert_eq!(e, h);
        assert!(e.distance(Vec3::new(0.0, -1.0, 1.0).norm()) < 1e-6);
    }
 }
--- a/crates/coremem/src/util/cache.rs
+++ b/crates/coremem/src/util/cache.rs
@@ -1,74 +0,0 @@
 use log::trace;
 use serde::{de::DeserializeOwned, Serialize};
 pub struct NoSupplier;
 pub struct DiskCache<K, V, S=NoSupplier> {
    path: String,
    entries: Vec<(K, V)>,
    supplier: S,
 }
 impl<K: DeserializeOwned, V: DeserializeOwned> DiskCache<K, V, NoSupplier> {
    pub fn new(path: &str) -> Self {
        Self::new_with_supplier(path, NoSupplier)
    }
 }
 impl<K: DeserializeOwned, V: DeserializeOwned, S> DiskCache<K, V, S> {
    pub fn new_with_supplier(path: &str, supplier: S) -> Self {
        let entries = Self::load_from_disk(path).unwrap_or_default();
        Self {
            path: path.into(),
            entries,
            supplier,
        }
    }
    fn load_from_disk(path: &str) -> Option<Vec<(K, V)>> {
        let reader = std::io::BufReader::new(std::fs::File::open(path).ok()?);
        bincode::deserialize_from(reader).ok()
    }
 }
 impl<K: PartialEq, V, S> DiskCache<K, V, S> {
    pub fn get(&self, k: &K) -> Option<&V> {
        self.entries.iter().find(|(comp_k, _v): &&(K, V)| comp_k == k).map(|(_k, v)| v)
    }
 }
 impl<K: Serialize, V: Serialize, S> DiskCache<K, V, S> {
    pub fn insert(&mut self, k: K, v: V) {
        self.entries.push((k, v));
        self.flush();
    }
    fn flush(&self) {
        let writer = std::io::BufWriter::new(std::fs::File::create(&self.path).unwrap());
        bincode::serialize_into(writer, &self.entries).unwrap();
    }
 }
 impl<K: PartialEq + Serialize, V: Serialize + Clone, S> DiskCache<K, V, S> {
    pub fn get_or_insert_with<F: FnOnce() -> V>(&mut self, k: K, f: F) -> V {
        if let Some(v) = self.get(&k) {
            return v.clone();
        }
        let v = f();
        self.insert(k, v.clone());
        v
    }
 }
 impl<K: PartialEq + Serialize, V: Serialize + Clone, S: FnMut(&K) -> V> DiskCache<K, V, S> {
    pub fn get_or_insert_from_supplier(&mut self, k: K) -> V {
        if let Some(v) = self.get(&k) {
            trace!("get_or_insert_from_supplier hit");
            return v.clone();
        }
        trace!("get_or_insert_from_supplier miss");
        let v = (self.supplier)(&k);
        self.insert(k, v.clone());
        v
    }
 }
--- a/crates/coremem/src/util/mod.rs
+++ b/crates/coremem/src/util/mod.rs
@@ -1 +0,0 @@
 pub mod cache;
--- a/crates/coremem/src/worker.rs
+++ b/crates/coremem/src/worker.rs
@@ -0,0 +1,216 @@
 //! consumer/producer primitives
 use crossbeam::channel::{self, Receiver, Sender};
 pub struct JobPool<C, R> {
    command_chan: Sender<C>,
    response_chan: Receiver<R>,
    worker_command_chan: Receiver<C>,
    worker_response_chan: Sender<R>,
    handles: Vec<std::thread::JoinHandle<()>>,
 }
 struct Worker<C, R, W> {
    command_chan: Receiver<C>,
    response_chan: Sender<R>,
    work_fn: W,
 }
 impl<C, R, W: Clone> Clone for Worker<C, R, W> {
    fn clone(&self) -> Self {
        Self {
            command_chan: self.command_chan.clone(),
            response_chan: self.response_chan.clone(),
            work_fn: self.work_fn.clone(),
        }
    }
 }
 impl<C, R: Send, W: Fn(C) -> R> Worker<C, R, W> {
    fn to_completion(self) {
        for cmd in &self.command_chan {
            let resp = (self.work_fn)(cmd);
            let _ = self.response_chan.send(resp);
        }
    }
 }
 impl<C, R> JobPool<C, R> {
    pub fn new(buffer: usize) -> Self {
        let (cmd_send, cmd_recv) = channel::bounded(buffer);
        let (resp_send, resp_recv) = channel::bounded(buffer);
        Self {
            command_chan: cmd_send,
            response_chan: resp_recv,
            worker_command_chan: cmd_recv,
            worker_response_chan: resp_send,
            handles: vec![],
        }
    }
    pub fn num_workers(&self) -> u32 {
        self.handles.len().try_into().unwrap()
    }
    pub fn recv(&self) -> R {
        self.response_chan.recv().unwrap()
    }
    /// `try_recv`. named `tend` because this is often used when we want to ensure no workers are
    /// blocked due to lack of space in the output queue.
    pub fn tend(&self) -> Option<R> {
        self.response_chan.try_recv().ok()
    }
    pub fn join_workers(&mut self) {
        // hang up the sender, to signal workers to exit.
        let cap = self.command_chan.capacity().unwrap_or(0);
        (self.command_chan, self.worker_command_chan) = channel::bounded(cap);
        (self.worker_response_chan, self.response_chan) = channel::bounded(cap);
        for h in self.handles.drain(..) {
            h.join().unwrap();
        }
    }
 }
 impl<C: Send + 'static, R: Send + 'static> JobPool<C, R> {
    pub fn spawn_workers<W: Fn(C) -> R + Send + Clone + 'static>(&mut self, n: u32, work_fn: W) {
        for _ in 0..n {
            self.spawn_worker(work_fn.clone());
        }
    }
 }
 impl<C: Send + 'static, R: Send + 'static> JobPool<C, R> {
    pub fn spawn_worker<W: Fn(C) -> R + Send + 'static>(&mut self, work_fn: W) {
        let worker = Worker {
            command_chan: self.worker_command_chan.clone(),
            response_chan: self.worker_response_chan.clone(),
            work_fn,
        };
        self.handles.push(std::thread::spawn(move || {
            worker.to_completion()
        }));
    }
 }
 impl<C, R> Drop for JobPool<C, R> {
    fn drop(&mut self) {
        self.join_workers();
    }
 }
 impl<C: Send + 'static, R> JobPool<C, R> {
    pub fn send(&self, cmd: C) {
        self.command_chan.send(cmd).unwrap();
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn lifecycle_no_workers() {
        let _pool: JobPool<(), ()> = JobPool::new(0);
    }
    #[test]
    fn lifecycle_some_workers() {
        let mut pool: JobPool<(), ()> = JobPool::new(0);
        pool.spawn_worker(|_| ());
        pool.spawn_workers(2, |_| ());
    }
    #[test]
    fn single_worker() {
        let mut pool: JobPool<u32, u32> = JobPool::new(0);
        pool.spawn_worker(|x| x*2);
        pool.send(5);
        assert_eq!(pool.recv(), 10);
        pool.send(4);
        assert_eq!(pool.recv(), 8);
    }
    #[test]
    fn multi_worker() {
        use std::sync::{Arc, Mutex};
        let mutex = Arc::new(Mutex::new(()));
        let worker_mutex = mutex.clone();
        let mut pool: JobPool<u32, u32> = JobPool::new(0);
        pool.spawn_workers(2, move |x| {
            // wait until caller unlocks us
            let _lock = worker_mutex.lock().unwrap();
            x*2
        });
        pool.send(1);
        assert_eq!(pool.recv(), 2);
        {
            let _lock = mutex.lock().unwrap();
            pool.send(4);
            pool.send(5); // shouldn't block
        }
        let mut replies = [pool.recv(), pool.recv()];
        replies.sort();
        assert_eq!(replies, [8, 10]);
    }
    #[test]
    fn exit_with_unclaimed_responses() {
        let mut pool: JobPool<u32, u32> = JobPool::new(0);
        pool.spawn_workers(2, |x| x*2);
        pool.send(5);
        pool.send(6);
    }
    #[test]
    fn num_workers() {
        let mut pool: JobPool<u32, u32> = JobPool::new(0);
        assert_eq!(pool.num_workers(), 0);
        pool.spawn_workers(2, |x| x*2);
        assert_eq!(pool.num_workers(), 2);
        pool.spawn_workers(1, |x| x*2);
        assert_eq!(pool.num_workers(), 3);
        pool.send(5);
        pool.send(6);
        assert_eq!(pool.num_workers(), 3);
        pool.recv();
        pool.recv();
        assert_eq!(pool.num_workers(), 3);
    }
    #[test]
    fn test_bounded() {
        let pool: JobPool<u32, u32> = JobPool::new(2);
        // we can do this without blocking even when there are no consumers
        // because it just gets buffered
        pool.send(1);
        pool.send(2);
    }
    #[test]
    fn join_workers() {
        let mut pool: JobPool<u32, u32> = JobPool::new(1);
        pool.spawn_workers(2, |x| x*2);
        pool.send(5);
        pool.join_workers();
        pool.spawn_workers(2, |x| x*2);
        pool.send(4);
        // the earlier response to '5' should be lost in the channel
        assert_eq!(pool.recv(), 8);
        // one message in the response queue; one in the send queue, 2 in the worker threads
        pool.send(3); pool.send(2); pool.send(1); pool.send(0);
        // should still be able to join even though everyone's blocked.
        pool.join_workers();
        pool.spawn_workers(1, |x| x*2);
        pool.send(7);
        // the old '0' command should be lost in the channel
        assert_eq!(pool.recv(), 14);
    }
 }
--- a/crates/cross/Cargo.toml
+++ b/crates/cross/Cargo.toml
@@ -0,0 +1,19 @@
 [package]
 name = "coremem_cross"
 version = "0.2.0"
 authors = ["Colin <colin@uninsane.org>"]
 edition = "2021"
 [features]
 # some functionality does not compile for the spirv target, so we feature gate these.
 serde = [ "dep:serde" ]
 fmt = []
 iter = []
 std = []
 [dependencies]
 serde = { version = "1.0", optional = true }  # MIT or Apache 2.0
 [dev-dependencies]
 coremem_cross = { path = ".", default-features = false, features = ["iter", "fmt", "std"] }
 float_eq = "1.0"  # MIT or Apache 2.0
--- a/crates/cross/src/compound/enumerated.rs
+++ b/crates/cross/src/compound/enumerated.rs
@@ -0,0 +1,388 @@
 use crate::compound::peano::{P0, Peano, PNext};
 use crate::compound::list::{self, Indexable, IntoList, List};
 // TODO: we can probably simplify a lot of this by using the newer List traits
 // for example:
 // - FoldOp<CallOn<P0>, V> calls the user function on the next item fed.
 // - FoldOp<CallOn<PNext<P>>, V> returns CallOn<P> as the next state.
 // - therefore we have a way to invoke a function on an arbitrary index for any list which is : Fold<CallOn<P>>
 // - use DiscrDispatch to instantiate the right CallOn<P>
 //
 // doing this well will require benchmarking before/after
 #[cfg(feature = "serde")]
 use serde::{Serialize, Deserialize};
 /// implement for something which supports being called for this specific variant
 pub trait Visitor<N: Peano, Arg, Output> {
    fn call(self, a: Arg) -> Output;
 }
 /// anything which can encode a discriminant up to *but not including* P
 pub trait DiscriminantCodable<P: Peano>: Sized {
    fn decode_discr(&self) -> Discr<P>;
    fn encode_discr(d: Discr<P>) -> Self;
    fn set_discr(&mut self, d: Discr<P>) {
        *self = Self::encode_discr(d)
    }
 }
 /// discriminant which encodes up to *but not including* P.
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct Discr<P: Peano>(u32, P::Unit);
 impl<P: Peano> DiscriminantCodable<P> for Discr<P> {
    fn decode_discr(&self) -> Self {
        *self
    }
    fn encode_discr(d: Discr<P>) -> Self {
        d
    }
 }
 impl<P: Peano> Discr<P> {
    pub fn new(u: u32) -> Self {
        assert!(u < P::VALUE);
        Self::new_unchecked(u)
    }
    pub fn value(&self) -> u32 {
        self.0
    }
    fn new_unchecked(u: u32) -> Self {
        Self(u, Default::default())
    }
 }
 /// given some Discr<P>, which is a runtime integer < P, invoke the handler H as H<N> where N is
 /// the Peano number equivalent to the runtime integer encoded by the discriminant.
 ///
 /// this is just how we dispatch a bounded discriminant while ensuring the receiver can handle
 /// every value we might call it with.
 pub trait DiscrDispatch<P: Peano> {
    fn dispatch<H: DiscrHandler<P, Output>, Output>(&self, h: H) -> Output;
 }
 impl<P: Peano> DiscrDispatch<PNext<P>> for Discr<PNext<P>>
 where
    Discr<P>: DiscrDispatch<P>
 {
    fn dispatch<H: DiscrHandler<PNext<P>, O>, O>(&self, h: H) -> O {
        match self.value() {
            // consider P=2: we want to dispatch values of 0 and 1, but handle 2:
            // so dispatch v < P
            v if v < P::VALUE => Discr::<P>::new_unchecked(v).dispatch(h.prev()),
            v if v == P::VALUE => h.call(),
            _ => unreachable!(),
        }
    }
 }
 impl DiscrDispatch<P0> for Discr<P0> {
    fn dispatch<H: DiscrHandler<P0, O>, O>(&self, _h: H) -> O {
        unreachable!()
    }
 }
 /// something which can be called with any value up to (but not including) N
 pub trait DiscrHandler<N: Peano, R> {
    type PrevOrPanic: DiscrHandler<N::PrevOrZero, R>;
    /// called when the discriminant has value N-1
    fn call(self) -> R;
    /// discriminant is < N-1: dispatch to the next handler.
    /// in the case that N = 1, this path *should* be unreachable,
    /// so a panic would be allowed.
    fn prev(self) -> Self::PrevOrPanic;
 }
 /// helper used to call F with some (yet-to-be-determined) index of I
 pub struct DispatchIndexable<I, F> {
    indexable: I,
    f: F,
 }
 impl<I, F> DispatchIndexable<I, F> {
    fn new(indexable: I, f: F) -> Self {
        Self { indexable, f }
    }
 }
 // base case: we tried to index all cases >= 0 and failed.
 // this should be unreachable.
 impl<'a, I, F, R> DiscrHandler<P0, R> for DispatchIndexable<I, F>
 {
    type PrevOrPanic = Self;
    fn call(self) -> R {
        unreachable!()
    }
    fn prev(self) -> Self::PrevOrPanic {
        unreachable!()
    }
 }
 // inductive case: if we know how dispatch up through P, and the collection is P+1-indexable, then we can
 // index up through P+1
 impl<'a, I, F, P: Peano, R> DiscrHandler<PNext<P>, R> for DispatchIndexable<&'a I, F>
 where
    I: Indexable<P>,
    I::Element: Copy,
    F: Visitor<P, I::Element, R>,
    Self: DiscrHandler<P, R>,
 {
    type PrevOrPanic = Self;
    fn call(self) -> R {
        self.f.call(self.indexable.get())
    }
    fn prev(self) -> Self::PrevOrPanic {
        self
    }
 }
 // mutable indexing case: if we have a mutable handle to the Indexable,
 // then assume the variants want to have mutable references to the items.
 impl<'a, I, F, P: Peano, R> DiscrHandler<PNext<P>, R> for DispatchIndexable<&'a mut I, F>
 where
    I: Indexable<P>,
    I::Element: 'a,
    F: Visitor<P, &'a mut I::Element, R>,
    Self: DiscrHandler<P, R>,
 {
    type PrevOrPanic = Self;
    fn call(self) -> R {
        self.f.call(self.indexable.get_mut())
    }
    fn prev(self) -> Self::PrevOrPanic {
        self
    }
 }
 /// discriminated enum. D encodes the discriminant, while L encodes the data. if D is the unit
 /// tuple, the discriminant is assumed to be encoded in the data itself (see
 /// `InternallyDiscriminated`)
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct Enum<D, L>(D, L);
 /// Users should prefer this type rather than rely on the internal Enum struct implementation.
 #[allow(dead_code)]
 pub type InternallyDiscriminated<Args> = Enum<(), List<Args>>;
 impl<P: Peano, L> Enum<(Discr<P>,), L> {
    #![allow(dead_code)]
    pub fn new<Variants>(v: Variants) -> Self
    where
        Variants: IntoList<List=L>,
        L: list::Meta<Length=P>,
    {
        Enum((Discr::default(),), v.into_list())
    }
 }
 impl<L> Enum<(), L> {
    #![allow(dead_code)]
    pub fn internally_discriminated<Variants>(v: Variants) -> Self
    where
        Variants: IntoList<List=L>,
    {
        Enum((), v.into_list())
    }
 }
 pub trait EnumRequirements {
    type NumVariants: Peano;
    fn decode_discr(&self) -> Discr<Self::NumVariants>;
    fn encode_discr(&mut self, d: Discr<Self::NumVariants>);
 }
 // externally discriminated
 impl<D, L> EnumRequirements for Enum<(D,), L>
 where
    D: DiscriminantCodable<<L as list::Meta>::Length>,
    L: list::Meta,
 {
    type NumVariants = <L as list::Meta>::Length;
    fn decode_discr(&self) -> Discr<Self::NumVariants> {
        self.0.0.decode_discr()
    }
    fn encode_discr(&mut self, d: Discr<Self::NumVariants>) {
        self.0.0.set_discr(d)
    }
 }
 // internally discriminated
 impl<L> EnumRequirements for Enum<(), L>
 where
    L: list::Meta + Indexable<P0>,
    list::ElementAt<P0, L>: DiscriminantCodable<<L as list::Meta>::Length>,
 {
    type NumVariants = <L as list::Meta>::Length;
    fn decode_discr(&self) -> Discr<Self::NumVariants> {
        self.1.get_ref().decode_discr()
    }
    fn encode_discr(&mut self, d: Discr<Self::NumVariants>) {
        self.1.get_mut().set_discr(d)
    }
 }
 impl<D, L> Enum<D, L>
 where
    Self: EnumRequirements
 {
    /// invoke the closure on the active variant, passing the variant by-value
    pub fn dispatch<'a, F, R>(&'a self, f: F) -> R
    where
        DispatchIndexable<&'a L, F>: DiscrHandler<<Self as EnumRequirements>::NumVariants, R>,
        // TODO: this trait bound shouldn't be necessary. Discr ALWAYS implements DiscrDispatch
        Discr<<Self as EnumRequirements>::NumVariants>: DiscrDispatch<<Self as EnumRequirements>::NumVariants>,
    {
        self.decode_discr().dispatch(DispatchIndexable::new(&self.1, f))
    }
    /// invoke the closure on the active variant, passing the variant by mutable reference
    #[allow(dead_code)]
    pub fn dispatch_mut<'a, F, R>(&'a mut self, f: F) -> R
    where
        DispatchIndexable<&'a mut L, F>: DiscrHandler<<Self as EnumRequirements>::NumVariants, R>,
        Discr<<Self as EnumRequirements>::NumVariants>: DiscrDispatch<<Self as EnumRequirements>::NumVariants>,
    {
        self.decode_discr().dispatch(DispatchIndexable::new(&mut self.1, f))
    }
    /// assign the enum to the variant `P` with value `value`.
    pub fn set<P>(&mut self, value: L::Element)
    where
        P: Peano,
        L: Indexable<P>,
    {
        self.encode_discr(Discr::new(P::VALUE));
        self.1.set(value);
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use crate::compound::peano::{P1, P2, P3};
    use crate::compound::list::List;
    struct ReadReceiver;
    impl<P: Peano, T: TryInto<i32>> Visitor<P, T, i32> for ReadReceiver {
        fn call(self, v: T) -> i32 {
            unsafe {
                v.try_into().unwrap_unchecked()
            }
        }
    }
    struct AddIndexPlus5Receiver;
    impl<P: Peano, T: TryInto<i32>> Visitor<P, T, i32> for AddIndexPlus5Receiver {
        fn call(self, v: T) -> i32 {
            unsafe {
                v.try_into().unwrap_unchecked() + P::VALUE as i32 + 5
            }
        }
    }
    struct Add4Receiver;
    impl<P: Peano, T: TryInto<i32> + TryFrom<i32> + Copy> Visitor<P, &mut T, ()> for Add4Receiver {
        fn call(self, v: &mut T) -> () {
            unsafe {
                *v = ((*v).try_into().unwrap_unchecked() + 4).try_into().unwrap_unchecked();
            }
        }
    }
    #[test]
    fn dispatch() {
        let mut e: Enum<(Discr<P3>,), List<(u32, i32, u8)>> = Enum::default();
        assert_eq!(e.dispatch(AddIndexPlus5Receiver), 5);
        e.encode_discr(Discr::new(1));
        assert_eq!(e.dispatch(AddIndexPlus5Receiver), 6);
        e.encode_discr(Discr::new(2));
        assert_eq!(e.dispatch(AddIndexPlus5Receiver), 7);
    }
    #[test]
    fn dispatch_mut() {
        let mut e: Enum<(Discr<P3>,), List<(u32, i32, u8)>> = Enum::default();
        e.dispatch_mut(Add4Receiver);
        assert_eq!(e.dispatch(AddIndexPlus5Receiver), 9);
        e.encode_discr(Discr::new(1));
        assert_eq!(e.dispatch(AddIndexPlus5Receiver), 6);
    }
    #[test]
    fn set() {
        let mut e: Enum<(Discr<P3>,), List<(u32, i32, u8)>> = Enum::default();
        e.set::<P0>(2u32);
        assert_eq!(e.dispatch(ReadReceiver), 2);
        e.set::<P1>(3i32);
        assert_eq!(e.dispatch(ReadReceiver), 3);
    }
    #[derive(Copy, Clone, Default, PartialEq)]
    struct BoxedF32(f32);
    impl Into<i32> for BoxedF32 {
        fn into(self) -> i32 {
            self.0 as i32
        }
    }
    impl From<i32> for BoxedF32 {
        fn from(v: i32) -> Self {
            Self(v as f32)
        }
    }
    impl<P: Peano> DiscriminantCodable<P> for BoxedF32 {
        fn decode_discr(&self) -> Discr<P> {
            match self.0 {
                v if v < 0f32 => Discr::new((-v) as u32),
                _non_negative => Discr::new(0),
            }
        }
        fn encode_discr(d: Discr<P>) -> Self {
            Self(-(d.value() as f32))
        }
    }
    #[test]
    fn internal_discr() {
        type E = Enum<(), List<(BoxedF32, i32, u8)>>;
        assert_eq!(<E as EnumRequirements>::NumVariants::VALUE, 3);
        let mut e: E = Enum::default();
        assert_eq!(e.dispatch(ReadReceiver), 0);
        e.set::<P0>(BoxedF32(16f32));
        assert_eq!(e.dispatch(ReadReceiver), 16);
        e.set::<P1>(5);
        assert_eq!(e.dispatch(ReadReceiver), 5);
        e.set::<P2>(8);
        assert_eq!(e.dispatch(ReadReceiver), 8);
        e.set::<P0>(BoxedF32(0f32));
        assert_eq!(e.dispatch(ReadReceiver), 0);
    }
    #[test]
    fn new() {
        type E = Enum<(Discr<P2>,), List<(u32, i32)>>;
        assert_eq!(<E as EnumRequirements>::NumVariants::VALUE, 2);
        let e: E = Enum::new((5u32, 4i32));
        assert_eq!(e.dispatch(ReadReceiver), 5);
        let e = Enum::internally_discriminated((BoxedF32(4f32), -1i32));
        assert_eq!(e.dispatch(ReadReceiver), 4);
    }
 }
--- a/crates/cross/src/compound/list/flat.rs
+++ b/crates/cross/src/compound/list/flat.rs
@@ -0,0 +1,352 @@
 //! list implementation where indexing is done non-recursively.
 //! this puts a hard limit on the size of a list which can still be indexed (based on macro impl)
 //! but works around a limitation in rust-gpu's spirv codegen which otherwise makes lists
 //! containing ZSTs break the compiler.
 //! this ZST bug should be fixed in later rust-gpu revisions. see: https://github.com/EmbarkStudios/rust-gpu/commit/03f89e8ba6f236218b3c5f9b18fe03c25a4d6a5c
 use crate::compound::list::{Indexable, Meta};
 use crate::compound::peano::{P0, Peano, PNext};
 #[cfg(feature = "serde")]
 use serde::{Serialize, Deserialize};
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct Node<H, T: ?Sized> {
    head: H,
    tail: T,
 }
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct Null;
 impl<H, T> Node<H, T> {
    pub(crate) fn new(head: H, tail: T) -> Self {
        Self { head, tail }
    }
    pub fn get<P: Peano>(&self) -> <Self as Indexable<P>>::Element
    where
        Self: Indexable<P>,
        <Self as Indexable<P>>::Element: Copy,
    {
        Indexable::<P>::get(self)
    }
 }
 pub trait IntoList {
    type List;
    fn into_list(self) -> Self::List;
 }
 impl IntoList for () {
    type List = Null;
    fn into_list(self) -> Self::List {
        Null
    }
 }
 /// expands to the type name for a list with the provided types
 /// ```ignore
 /// list_for!(E0, E1, E2, T) => Node<E0, Node<E1, Node<E2, T>>>>
 /// ```
 macro_rules! list_for {
    ($head:ident) => ($head);
    ($head:ident, $($rest:ident),+) => (Node<$head, list_for!($($rest),+)>);
 }
 /// given N idents, return P`N`.
 /// ```ignore
 /// peano_for!(P0 a, b, c) => P3
 /// ```
 macro_rules! peano_for {
    (P0 $head:ident) => (P0);
    (P0 $head:ident, $($rest:ident),+) => (PNext<peano_for!(P0 $($rest),+)>);
 }
 /// expands to the last item in the sequence
 /// ```ignore
 /// last!(a, bc, d) => d
 /// ```
 macro_rules! last {
    ($last:ident) => ($last);
    ($first:ident, $($rest:ident),+) => (last!($($rest),+));
 }
 /// transforms a list of idents into a `self.tail.[...].tail.head` pattern
 /// of the same length.
 /// ```ignore
 /// member_index!(self tail head a, b, c, d, e) => self.tail.tail.tail.head
 /// ```
 macro_rules! member_index {
    // entry point: process `self`
    ($self:ident tail head $first:ident, $($rest:ident),+) => (
        member_index!(@partial tail head [$self] $($rest),+)
    );
    // recursion: replace `$next` with "tail" and repeat
    (@partial tail head [$($converted:ident),+] $next:ident, $($rest:ident),+) => (
        member_index!(@partial tail head [$($converted),+, tail] $($rest),+)
    );
    // process the head: replace `$last` with "head" and then trigger the concat
    (@partial tail head [$($converted:ident),+] $last:ident) => (
        member_index!(@partial [$($converted),+, head])
    );
    // base case: all items have been replaced with "tail" or "head": concat them
    (@partial [$($converted:ident),+]) => ($($converted).+);
 }
 /// implements the Indexable trait for the last element provided of any prefix list.
 /// ```ignore
 /// impl_indexable!(E0, E1, E2)
 /// => impl<E0, E1, E2, T> Indexable<P2> for Node<E0, Node<E1, Node<E1, T>>> { ... }
 /// ```
 macro_rules! impl_indexable {
    ($($elems:ident),+) => (
        impl<$($elems),+, T> Indexable<peano_for!(P0 $($elems),+)> for list_for!($($elems),+, T) {
            type Element = last!($($elems),+);
            fn get(&self) -> Self::Element where Self::Element: Copy {
                member_index!(self tail head $($elems),+, H)
            }
            fn get_ref(&self) -> &Self::Element {
                &member_index!(self tail head $($elems),+, H)
            }
            fn get_mut(&mut self) -> &mut Self::Element {
                &mut member_index!(self tail head $($elems),+, H)
            }
            fn set(&mut self, v: Self::Element) {
                member_index!(self tail head $($elems),+, H) = v;
            }
        }
    );
 }
 /// implements the IntoList trait for the tuple of the provided elements.
 /// ```ignore
 /// impl_into_list!(E0, E1, E2)
 /// => impl<E0, E1, E2> IntoList for (E0, E1, E2) { ... }
 /// ```
 macro_rules! impl_into_list {
    // syntax irregularities around the 1-tuple means we need to special-case this.
    ($only:ident) => (
        impl<$only> IntoList for ($only,) {
            type List = list_for!($only, Null);
            fn into_list(self) -> Self::List {
                let (only,) = self;
                Node::new(only, ().into_list())
            }
        }
    );
    ($first:ident, $($next:ident),+) => (
        impl<$first, $($next),+> IntoList for ($first, $($next),+) {
            type List = list_for!($first, $($next),+, Null);
            fn into_list(self) -> Self::List {
                #[allow(non_snake_case)]
                let ( $first, $($next),+ ) = self;
                Node::new($first, ( $($next),+, ).into_list())
            }
        }
    );
 }
 macro_rules! impl_list_traits {
    ($($elems:ident),+) => (
        impl_indexable!($($elems),+);
        impl_into_list!($($elems),+);
    )
 }
 impl_list_traits!(E0);
 impl_list_traits!(E0, E1);
 impl_list_traits!(E0, E1, E2);
 impl_list_traits!(E0, E1, E2, E3);
 impl_list_traits!(E0, E1, E2, E3, E4);
 impl_list_traits!(E0, E1, E2, E3, E4, E5);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92, E93);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92, E93, E94);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92, E93, E94, E95);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92, E93, E94, E95, E96);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92, E93, E94, E95, E96, E97);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92, E93, E94, E95, E96, E97, E98);
 impl_list_traits!(E0, E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E90, E91, E92, E93, E94, E95, E96, E97, E98, E99);
 pub type Prepended<E, Tail> = Node<E, Tail>;
 pub trait Prependable {
    fn prepend<E>(self, e: E) -> Node<E, Self>;
 }
 impl Prependable for Null {
    fn prepend<E>(self, e: E) -> Node<E, Self> {
        Node::new(e, self)
    }
 }
 impl<H, T> Prependable for Node<H, T> {
    fn prepend<E>(self, e: E) -> Node<E, Self> {
        Node::new(e, self)
    }
 }
 pub type Appended<Head, Next> = <Head as Appendable<Next>>::Result;
 pub trait Appendable<E> {
    // XXX can't move the E parameter inside without Generic Associated Types
    type Result;
    fn append(self, e: E) -> Self::Result;
 }
 impl<E> Appendable<E> for Null {
    type Result = Node<E, Null>;
    fn append(self, e: E) -> Self::Result {
        Node::new(e, Null)
    }
 }
 impl<H, T, E> Appendable<E> for Node<H, T>
    where T: Appendable<E>
 {
    type Result = Node<H, T::Result>;
    fn append(self, e: E) -> Self::Result {
        Node::new(self.head, self.tail.append(e))
    }
 }
 impl Meta for Null {
    type Length = P0;
 }
 impl<H, T: Meta> Meta for Node<H, T> {
    type Length = PNext<T::Length>;
 }
 pub trait SplitHead {
    type Head;
    type Tail;
    fn split(self) -> (Self::Head, Self::Tail);
    fn split_ref<'a>(&'a self) -> (&'a Self::Head, &'a Self::Tail);
 }
 impl<H, T> SplitHead for Node<H, T> {
    type Head = H;
    type Tail = T;
    fn split(self) -> (Self::Head, Self::Tail) {
        (self.head, self.tail)
    }
    fn split_ref<'a>(&'a self) -> (&'a Self::Head, &'a Self::Tail) {
        (&self.head, &self.tail)
    }
 }
 /// these are exported for the convenience of potential consumers: not needed internally
 pub(crate) mod exports {
    #![allow(dead_code)]
    use super::{IntoList, Node, Null};
    pub type List<Args> = <Args as IntoList>::List;
    pub type List1<E0> = Node<E0, Null>;
    pub type List2<E0, E1> = Node<E0, List1<E1>>;
    pub type List3<E0, E1, E2> = Node<E0, List2<E1, E2>>;
    pub type List4<E0, E1, E2, E3> = Node<E0, List3<E1, E2, E3>>;
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn append() {
        let l0 = ().into_list();
        let l1 = l0.append(5u32);
        assert!(l1 == (5u32,).into_list());
        let l2 = l1.append(());
        assert!(l2 == (5u32, ()).into_list());
        let l3 = l2.append(4f32);
        assert!(l3 == (5u32, (), 4f32).into_list());
    }
 }
--- a/crates/cross/src/compound/list/linked.rs
+++ b/crates/cross/src/compound/list/linked.rs
@@ -0,0 +1,130 @@
 use crate::compound::peano::{P0, Peano, PeanoNonZero};
 #[cfg(feature = "serde")]
 use serde::{Serialize, Deserialize};
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct Middle<H, T> {
    head: H,
    tail: T,
 }
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct Terminal<H> {
    head: H,
    // XXX: needed to handle ZSTs in spirv, else we can't hand out a reference to &self
    _pad: u32,
 }
 impl<H, T> Middle<H, T> {
    fn new(head: H, tail: T) -> Self {
        Middle { head, tail }
    }
 }
 impl<H> Terminal<H> {
    fn new(head: H) -> Self {
        Terminal { head, _pad: Default::default() }
    }
 }
 // Self is a Superlist of L
 pub trait Superlist<Distance: Peano> {
    type Of: ListOps;
    fn as_sublist(&self) -> &Self::Of;
 }
 impl<L: ListOps> Superlist<P0> for L {
    type Of = L;
    fn as_sublist(&self) -> &Self::Of {
        self
    }
 }
 // if our tail T0 is a Superlist<P-1> of T1,
 // then we are a Superlist<P> of T1.
 impl<H0, T0: ListOps, T1: ListOps, P: PeanoNonZero> Superlist<P> for Middle<H0, T0>
 where
    T0: Superlist<P::Prev, Of=T1>
 {
    type Of = T1;
    fn as_sublist(&self) -> &T1 {
        self.tail.as_sublist()
    }
 }
 pub trait ListOps {
    type Element;
    fn read(&self) -> Self::Element where Self::Element: Copy;
    fn index<P: Peano>(&self) -> &<Self as Superlist<P>>::Of
        where Self: Superlist<P>
    {
        self.as_sublist()
    }
    fn get<P: Peano>(&self) -> <<Self as Superlist<P>>::Of as ListOps>::Element
    where 
        Self: Superlist<P>,
        <<Self as Superlist<P>>::Of as ListOps>::Element: Copy
    {
        self.index::<P>().read()
    }
 }
 impl<H> ListOps for Terminal<H> {
    type Element = H;
    fn read(&self) -> Self::Element where Self::Element: Copy {
        self.head
    }
 }
 impl<H, T> ListOps for Middle<H, T> {
    type Element = H;
    fn read(&self) -> Self::Element where Self::Element: Copy {
        self.head
    }
 }
 pub trait IntoList {
    type List;
    fn into_list(self) -> Self::List;
 }
 impl<E0> IntoList for (E0,) {
    type List = Terminal<E0>;
    fn into_list(self) -> Self::List {
        Terminal::new(self.0)
    }
 }
 impl<E0, E1> IntoList for (E0, E1) {
    type List = Middle<E0, <(E1,) as IntoList>::List>;
    fn into_list(self) -> Self::List {
        Middle::new(self.0, (self.1,).into_list())
    }
 }
 impl<E0, E1, E2> IntoList for (E0, E1, E2) {
    type List = Middle<E0, <(E1, E2) as IntoList>::List>;
    fn into_list(self) -> Self::List {
        Middle::new(self.0, (self.1, self.2).into_list())
    }
 }
 pub type List<Args> = <Args as IntoList>::List;
 #[cfg(test)]
 mod test {
    use super::*;
    use crate::compound::peano::{P0, P1, P2};
    #[test]
    fn list_index() {
        let l = (1u32, 3f32, 4u32).into_list();
        assert_eq!(l.read(), 1u32);
        assert_eq!(l.index::<P0>().read(), 1u32);
        assert_eq!(l.index::<P1>().read(), 3f32);
        assert_eq!(l.index::<P2>().read(), 4u32);
    }
 }
--- a/crates/cross/src/compound/list/mod.rs
+++ b/crates/cross/src/compound/list/mod.rs
@@ -0,0 +1,724 @@
 use crate::compound::peano::{Peano, P0};
 mod flat;
 // mod linked;
 // mod tuple_consumer;
 // pub use tuple_consumer::*;
 // pub use linked::*;
 pub use flat::{IntoList, Appendable, Appended, Prependable, Prepended};
 pub use flat::exports::*;
 use flat::{Node, SplitHead};
 pub type Empty = flat::Null;
 /// something which can be indexed at `P`.
 /// a List of length N is expected to implement `Indexable<P>` for all `P < N`.
 pub trait Indexable<P: Peano> {
    type Element;
    fn get(&self) -> Self::Element where Self::Element: Copy;
    fn get_ref(&self) -> &Self::Element;
    fn get_mut(&mut self) -> &mut Self::Element;
    fn set(&mut self, v: Self::Element);
 }
 pub trait IndexableExplicit {
    fn get<P: Peano>(&self) -> <Self as Indexable<P>>::Element
    where
        Self: Indexable<P>,
        Self::Element: Copy,
    {
        Indexable::get(self)
    }
    fn get_ref<P: Peano>(&self) -> &<Self as Indexable<P>>::Element
    where
        Self: Indexable<P>,
    {
        Indexable::get_ref(self)
    }
    fn get_mut<P: Peano>(&mut self) -> &mut <Self as Indexable<P>>::Element
    where
        Self: Indexable<P>,
    {
        Indexable::get_mut(self)
    }
    fn set<P: Peano>(&mut self, v: Self::Element)
    where
        Self: Indexable<P>,
    {
        Indexable::set(self, v)
    }
    fn get_first(&self) -> <Self as Indexable<P0>>::Element
    where
        Self: Indexable<P0>,
        Self::Element: Copy,
    {
        Indexable::get(self)
    }
    fn get_first_ref(&self) -> &<Self as Indexable<P0>>::Element
    where
        Self: Indexable<P0>,
    {
        Indexable::get_ref(self)
    }
    fn get_first_mut(&mut self) -> &mut <Self as Indexable<P0>>::Element
    where
        Self: Indexable<P0>,
    {
        Indexable::get_mut(self)
    }
    fn set_first(&mut self, v: Self::Element)
    where
        Self: Indexable<P0>,
    {
        Indexable::set(self, v)
    }
 }
 impl<L> IndexableExplicit for L {}
 /// convenience to lookup the type of the element at index `P` of list `L`.
 pub type ElementAt<P, L> = <L as Indexable<P>>::Element;
 /// implemented by any List (including the Null, empty list)
 pub trait Meta {
    type Length: Peano;
    fn len(&self) -> u32 {
        Self::Length::VALUE
    }
 }
 pub trait ListConsumer<L> {
    type Output;
    fn consume(self, a: L) -> Self::Output;
 }
 /// implement on your own type to process one list value and return whatever state is necessary to
 /// process the subsequent value (and by extension all values)
 pub trait FoldOp<State, V> {
    type Output;
    fn feed(&mut self, prev: State, next: V) -> Self::Output;
 }
 pub struct FoldImpl<Op, State>(Op, State);
 //////// fold by-value
 impl<Op, State> ListConsumer<Empty> for FoldImpl<Op, State> {
    type Output = State;
    fn consume(self, _l: Empty) -> Self::Output {
        self.1
    }
 }
 impl<H, T, Op, State> ListConsumer<Node<H, T>> for FoldImpl<Op, State>
 where
    Op: FoldOp<State, H>,
    FoldImpl<Op, Op::Output>: ListConsumer<T>,
 {
    type Output = <FoldImpl<Op, Op::Output> as ListConsumer<T>>::Output;
    fn consume(self, l: Node<H, T>) -> Self::Output {
        let FoldImpl(mut op, state) = self;
        let (head, tail) = l.split();
        let next_state = op.feed(state, head);
        FoldImpl(op, next_state).consume(tail)
    }
 }
 //////// fold by-ref
 impl<Op, State> ListConsumer<&Empty> for FoldImpl<Op, State> {
    type Output = State;
    fn consume(self, _l: &Empty) -> Self::Output {
        self.1
    }
 }
 impl<'a, H, T, Op, State> ListConsumer<&'a Node<H, T>> for FoldImpl<Op, State>
 where
    Op: FoldOp<State, &'a H>,
    FoldImpl<Op, Op::Output>: ListConsumer<&'a T>,
 {
    type Output = <FoldImpl<Op, Op::Output> as ListConsumer<&'a T>>::Output;
    fn consume(self, l: &'a Node<H, T>) -> Self::Output {
        let FoldImpl(mut op, state) = self;
        let (head, tail) = l.split_ref();
        let next_state = op.feed(state, head);
        FoldImpl(op, next_state).consume(tail)
    }
 }
 pub trait Fold<Op, Init> {
    type Output;
    fn fold(self, op: Op, init: Init) -> Self::Output;
 }
 impl<L, Op, Init> Fold<Op, Init> for L
 where
    FoldImpl<Op, Init>: ListConsumer<L>
 {
    type Output = <FoldImpl<Op, Init> as ListConsumer<L>>::Output;
    fn fold(self, op: Op, init: Init) -> Self::Output {
        FoldImpl(op, init).consume(self)
    }
 }
 pub trait Visitor<E> {
    fn visit(&mut self, v: E);
 }
 pub struct VisitOp<V>(V);
 impl<V, Next> FoldOp<(), Next> for VisitOp<V>
 where
    V: Visitor<Next>
 {
    type Output = ();
    fn feed(&mut self, _prev: (), next: Next) {
        self.0.visit(next)
    }
 }
 /// invokes the Visitor `V` on every element of the list.
 pub trait Visit<V> {
    fn visit(self, v: V);
 }
 impl<V, L> Visit<V> for L
 where
    L: Fold<VisitOp<V>, (), Output=()>
 {
    fn visit(self, v: V) {
        self.fold(VisitOp(v), ())
    }
 }
 pub struct ReverseOp;
 impl<Prev, Next> FoldOp<Prev, Next> for ReverseOp {
    type Output = Node<Next, Prev>;
    fn feed(&mut self, prev: Prev, next: Next) -> Self::Output {
        Node::new(next, prev)
    }
 }
 pub trait Reverse {
    type Output;
    fn reverse(self) -> Self::Output;
 }
 impl<L> Reverse for L
 where
    L: Fold<ReverseOp, Empty>
 {
    type Output = L::Output;
    fn reverse(self) -> Self::Output {
        self.fold(ReverseOp, Empty::default())
    }
 }
 pub struct SumOp;
 impl<Prev, Next> FoldOp<Prev, Next> for SumOp
 where
    Prev: core::ops::Add<Next>,
 {
    type Output = Prev::Output;
    fn feed(&mut self, prev: Prev, next: Next) -> Self::Output {
        prev + next
    }
 }
 pub trait Sum<Init> {
    type Output;
    fn sum(self, init: Init) -> Self::Output;
 }
 impl<Init, L> Sum<Init> for L
 where
    L: Fold<SumOp, Init>
 {
    type Output = L::Output;
    fn sum(self, init: Init) -> Self::Output {
        self.fold(SumOp, init)
    }
 }
 #[cfg(feature = "std")]
 mod into_vec {
    use super::*;
    pub struct IntoVecOp;
    impl<Next> FoldOp<Vec<Next>, Next> for IntoVecOp {
        type Output = Vec<Next>;
        fn feed(&mut self, mut prev: Vec<Next>, next: Next) -> Self::Output {
            prev.push(next);
            prev
        }
    }
    pub trait IntoVec<T> {
        fn into_vec(self) -> Vec<T>;
    }
    impl<T, L> IntoVec<T> for L
    where
        L: Fold<IntoVecOp, Vec<T>, Output=Vec<T>>
    {
        fn into_vec(self) -> Vec<T> {
            self.fold(IntoVecOp, Vec::new())
        }
    }
 }
 #[cfg(feature = "std")]
 pub use into_vec::{IntoVec, IntoVecOp};
 pub trait MapVisitor<V> {
    type Output;
    fn map(&self, elem: V) -> Self::Output;
 }
 pub struct MapOp<F>(F);
 impl<Prev, Next, F> FoldOp<Prev, Next> for MapOp<F>
 where
    F: MapVisitor<Next>,
    Prev: Appendable<F::Output>,
 {
    type Output = Appended<Prev, F::Output>;
    fn feed(&mut self, prev: Prev, next: Next) -> Self::Output {
        prev.append(self.0.map(next))
    }
 }
 pub trait Map<Visitor> {
    type Output;
    fn map(self, op: Visitor) -> Self::Output;
 }
 impl<L, Visitor> Map<Visitor> for L
 where
    L: Fold<MapOp<Visitor>, Empty>
 {
    type Output = L::Output;
    fn map(self, visitor: Visitor) -> Self::Output {
        self.fold(MapOp(visitor), Empty::default())
    }
 }
 pub struct IdentityMapVisitor;
 impl<V> MapVisitor<V> for IdentityMapVisitor {
    type Output = V;
    fn map(&self, elem: V) -> Self::Output {
        elem
    }
 }
 pub trait Extend<L> {
    type Output;
    fn extend(self, l: L) -> Self::Output;
 }
 impl<L0, L1> Extend<L1> for L0
 where
    L1: Fold<MapOp<IdentityMapVisitor>, L0>
 {
    type Output = L1::Output;
    fn extend(self, l: L1) -> Self::Output {
        l.fold(MapOp(IdentityMapVisitor), self)
    }
 }
 pub struct FlattenOp;
 impl<Prev, Next> FoldOp<Prev, Next> for FlattenOp
 where
    Prev: Extend<Next>,
 {
    type Output = Prev::Output;
    fn feed(&mut self, prev: Prev, next: Next) -> Self::Output {
        prev.extend(next)
    }
 }
 pub trait Flatten {
    type Output;
    fn flatten(self) -> Self::Output;
 }
 impl<L> Flatten for L
 where
    L: Fold<FlattenOp, Empty>
 {
    type Output = L::Output;
    fn flatten(self) -> Self::Output {
        self.fold(FlattenOp, Empty::default())
    }
 }
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct Tagged<P: Peano, V> {
    inner: V,
    _p: P::Unit,
 }
 impl<P: Peano, V> Tagged<P, V> {
    pub fn new(inner: V) -> Self {
        Self { inner, _p: P::Unit::default() }
    }
    pub fn into_inner(self) -> V {
        self.inner
    }
 }
 impl<P: Peano, V> core::ops::Deref for Tagged<P, V> {
    type Target = V;
    fn deref(&self) -> &Self::Target {
        &self.inner
    }
 }
 impl<P: Peano, V> core::ops::DerefMut for Tagged<P, V> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.inner
    }
 }
 pub struct EnumerateOp;
 impl<L, Next> FoldOp<L, Next> for EnumerateOp
 where
    L: Meta,
    L: Appendable<Tagged<L::Length, Next>>,
 {
    type Output = Appended<L, Tagged<L::Length, Next>>;
    fn feed(&mut self, prev: L, next: Next) -> Self::Output {
        prev.append(Tagged::new(next))
    }
 }
 pub trait Enumerate {
    type Output;
    fn enumerate(self) -> Self::Output;
 }
 impl<L> Enumerate for L
 where
    L: Fold<EnumerateOp, Empty>
 {
    type Output = L::Output;
    fn enumerate(self) -> Self::Output {
        self.fold(EnumerateOp, Empty::default())
    }
 }
 pub struct MapTagToValueOp<T>(T /* unused */);
 impl<P: Peano, V, T: From<u32>> MapVisitor<Tagged<P, V>> for MapTagToValueOp<T> {
    type Output = (T, V);
    fn map(&self, elem: Tagged<P, V>) -> Self::Output {
        (P::VALUE.into(), elem.into_inner())
    }
 }
 pub trait EnumerateU32 {
    type Output;
    fn enumerate_u32(self) -> Self::Output;
 }
 impl<L> EnumerateU32 for L
 where
    L: Enumerate,
    L::Output: Map<MapTagToValueOp<u32>>,
 {
    type Output = <L::Output as Map<MapTagToValueOp<u32>>>::Output;
    fn enumerate_u32(self) -> Self::Output {
        self.enumerate().map(MapTagToValueOp(0u32 /* unused */))
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use crate::compound::peano::{P0, P1, P2};
    struct SumVal;
    impl FoldOp<i32, i32> for SumVal {
        type Output = i32;
        fn feed(&mut self, prev: i32, next: i32) -> Self::Output {
            prev + next
        }
    }
    #[test]
    fn fold_prim() {
        let list = (3, 4, 5i32).into_list();
        assert_eq!(list.fold(SumVal, 2i32), 2+3+4+5);
    }
    impl FoldOp<i32, f32> for SumVal {
        type Output = f32;
        fn feed(&mut self, prev: i32, next: f32) -> Self::Output {
            prev as f32 + next
        }
    }
    impl FoldOp<f32, f32> for SumVal {
        type Output = f32;
        fn feed(&mut self, prev: f32, next: f32) -> Self::Output {
            prev + next
        }
    }
    impl FoldOp<f32, i32> for SumVal {
        type Output = f32;
        fn feed(&mut self, prev: f32, next: i32) -> Self::Output {
            prev + next as f32
        }
    }
    #[test]
    fn fold_mixed() {
        // we fold:
        // 2i32 + 3i32
        // 5i32 + 4i32
        // 9i32 + 5.5f32
        // 14.5f32 + 6.5f32
        // 21f32 + 7i32
        let list = (3i32, 4i32, 5.5f32, 6.5f32, 7i32).into_list();
        assert_eq!(list.fold(SumVal, 2i32), 28f32);
    }
    #[derive(PartialEq)]
    struct NotCopy(i32);
    struct SumRef;
    impl FoldOp<i32, &NotCopy> for SumRef {
        type Output = i32;
        fn feed(&mut self, prev: i32, next: &NotCopy) -> Self::Output {
            prev + next.0
        }
    }
    impl FoldOp<i32, &i32> for SumRef {
        type Output = i32;
        fn feed(&mut self, prev: i32, next: &i32) -> Self::Output {
            prev + *next
        }
    }
    #[test]
    fn fold_ref() {
        let list = &(3i32, NotCopy(4i32), 5i32).into_list();
        assert_eq!(list.fold(SumRef, 2i32), 14i32);
        assert!(list == list); // just check that it wasn't consumed
    }
    struct NoopVisitor;
    impl<V> Visitor<V> for NoopVisitor {
        fn visit(&mut self, _e: V) {}
    }
    #[test]
    fn visit_noop() {
        let list = (3f32, NotCopy(4i32), 5u32).into_list();
        list.visit(NoopVisitor);
        let list = &(3f32, NotCopy(4i32), 5u32).into_list();
        list.visit(NoopVisitor);
    }
    struct AccumVisitor(i32);
    impl Visitor<i32> for &mut AccumVisitor {
        fn visit(&mut self, e: i32) {
            self.0 += e * 2;
        }
    }
    #[test]
    fn visit_mut() {
        let list = (3i32, 4i32, 5i32).into_list();
        let mut v = AccumVisitor(0);
        list.visit(&mut v);
        assert_eq!(v.0, 24);
    }
    #[test]
    fn into_vec() {
        let list = (3i32, 4i32, 5i32).into_list();
        assert_eq!(list.into_vec(), vec![3i32, 4, 5]);
    }
    #[test]
    fn into_vec_ref() {
        let list = &(3i32, 4i32, 5i32).into_list();
        assert_eq!(list.into_vec(), vec![&3i32, &4, &5]);
    }
    #[test]
    fn into_vec_empty() {
        assert_eq!(IntoVec::<u32>::into_vec(Empty::default()), vec![]);
    }
    #[test]
    fn sum() {
        let list = (3i32, 4i32, 5i32).into_list();
        assert_eq!(list.sum(2i32), 14i32);
    }
    struct SumA(f32);
    #[derive(Debug, PartialEq)]
    struct SumB(f32);
    impl core::ops::Add<i32> for SumA {
        type Output = SumB;
        fn add(self, other: i32) -> Self::Output {
            SumB(self.0 + other as f32)
        }
    }
    impl core::ops::Add<f32> for SumB {
        type Output = SumA;
        fn add(self, other: f32) -> Self::Output {
            SumA(self.0 + other)
        }
    }
    #[test]
    fn sum_mixed() {
        let list = (3i32, 4f32, 5i32).into_list();
        assert_eq!(list.sum(SumA(2f32)), SumB(14f32));
    }
    #[test]
    fn reverse_empty() {
        assert!(Empty::default().reverse() == Empty::default());
    }
    #[test]
    fn reverse_non_empty() {
        let list = (3i32, 4f32, 5u32).into_list();
        let expected = (5u32, 4f32, 3i32).into_list();
        assert!(list.reverse() == expected);
    }
    struct Double;
    impl MapVisitor<i32> for Double {
        type Output = i32;
        fn map(&self, v: i32) -> Self::Output {
            v + v
        }
    }
    impl MapVisitor<f32> for Double {
        type Output = f32;
        fn map(&self, v: f32) -> Self::Output {
            v + v
        }
    }
    #[test]
    fn map_empty() {
        assert!(Empty::default().map(Double) == Empty::default());
    }
    #[test]
    fn map_mixed() {
        let list = (2i32, 3f32, 4i32).into_list();
        let expected = (4i32, 6f32, 8i32).into_list();
        assert!(list.map(Double) == expected);
    }
    #[test]
    fn extend_empty() {
        let l0 = Empty::default();
        let l1 = Empty::default();
        assert!(l0.extend(l1) == Empty::default());
    }
    #[test]
    fn extend_with_empty() {
        let l0 = (2u32, 3f32).into_list();
        let l1 = Empty::default();
        let expected = (2u32, 3f32).into_list();
        assert!(l0.extend(l1) == expected);
    }
    #[test]
    fn extend_from_empty() {
        let l0 = Empty::default();
        let l1 = (2u32, 3f32).into_list();
        let expected = (2u32, 3f32).into_list();
        assert!(l0.extend(l1) == expected);
    }
    #[test]
    fn extend_mixed() {
        let l0 = (2u32, 3f32).into_list();
        let l1 = ("hello",).into_list();
        let expected = (2u32, 3f32, "hello").into_list();
        assert!(l0.extend(l1) == expected);
    }
    #[test]
    fn flatten_empty() {
        assert!(Empty::default().flatten() == Empty::default());
    }
    #[test]
    fn flatten_inner_empty1() {
        let l = (Empty::default(),).into_list();
        assert!(l.flatten() == Empty::default());
    }
    #[test]
    fn flatten_inner_empty2() {
        let l = (Empty::default(), Empty::default()).into_list();
        assert!(l.flatten() == Empty::default());
    }
    #[test]
    fn flatten_mixed() {
        let l = (
            (2u32, 3f32).into_list(),
            (4u32,).into_list(),
        ).into_list();
        let expected = (
            2u32,
            3f32,
            4u32,
        ).into_list();
        assert!(l.flatten() == expected);
    }
    #[test]
    fn flatten_nested() {
        let l = (
            (2u32, 3f32).into_list(),
            ("hello", ("every", "one").into_list()).into_list(),
            (4u32,).into_list(),
        ).into_list();
        let expected = (
            2u32,
            3f32,
            "hello",
            ("every", "one").into_list(),
            4u32,
        ).into_list();
        assert!(l.flatten() == expected);
    }
    #[test]
    fn enumerate_empty() {
        assert!(Empty::default().enumerate() == Empty::default());
    }
    #[test]
    fn enumerate_one() {
        let list = (2i32,).into_list();
        let expected = (Tagged::<P0, _>::new(2i32),).into_list();
        assert!(list.enumerate() == expected);
    }
    #[test]
    fn enumerate_multiple() {
        let list = (2i32, (), 4f32).into_list();
        let expected = (
            Tagged::<P0, _>::new(2i32),
            Tagged::<P1, _>::new(()),
            Tagged::<P2, _>::new(4f32),
        ).into_list();
        assert!(list.enumerate() == expected);
    }
    #[test]
    fn enumerate_u32_multiple() {
        let list = (2i32, (), 4f32).into_list();
        let expected = (
            (0u32, 2i32),
            (1u32, ()),
            (2u32, 4f32)
        ).into_list();
        assert!(list.enumerate_u32() == expected);
    }
 }
--- a/crates/cross/src/compound/list/tuple_consumer.rs
+++ b/crates/cross/src/compound/list/tuple_consumer.rs
@@ -0,0 +1,161 @@
 use crate::compound::peano::{P0, PNext};
 #[cfg(feature = "serde")]
 use serde::{Serialize, Deserialize};
 pub trait TuplePrims: Sized {
    type Head; // single element
    type Tail; // variably-sized tuple
    fn into_head(self) -> Self::Head;
    fn into_tail(self) -> Self::Tail;
 }
 impl<E0> TuplePrims for (E0,) {
    type Head = E0;
    type Tail = ();
    fn into_head(self) -> Self::Head {
        self.0
    }
    fn into_tail(self) -> Self::Tail {
        ()
    }
 }
 impl<E0, E1> TuplePrims for (E0, E1) {
    type Head = E0;
    type Tail = (E1,);
    fn into_head(self) -> Self::Head {
        self.0
    }
    fn into_tail(self) -> Self::Tail {
        (self.1,)
    }
 }
 impl<E0, E1, E2> TuplePrims for (E0, E1, E2) {
    type Head = E0;
    type Tail = (E1, E2);
    fn into_head(self) -> Self::Head {
        self.0
    }
    fn into_tail(self) -> Self::Tail {
        (self.1, self.2)
    }
 }
 impl<E0, E1, E2, E3> TuplePrims for (E0, E1, E2, E3) {
    type Head = E0;
    type Tail = (E1, E2, E3);
    fn into_head(self) -> Self::Head {
        self.0
    }
    fn into_tail(self) -> Self::Tail {
        (self.1, self.2, self.3)
    }
 }
 // note that this construction allows a zero-length list (Null),
 // which is sort of interesting.
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct List<Args>(Args);
 impl<Args> List<Args> {
    pub fn new(args: Args) -> Self {
        Self(args)
    }
 }
 pub trait ListPrims: Sized {
    type Head; // single element
    type Tail; // variably-sized List
    fn into_head(self) -> Self::Head;
    fn into_tail(self) -> Self::Tail;
 }
 impl<Args: TuplePrims> ListPrims for List<Args> {
    type Head = Args::Head;
    type Tail = List<Args::Tail>;
    fn into_head(self) -> Self::Head {
        self.0.into_head()
    }
    fn into_tail(self) -> Self::Tail {
        List(self.0.into_tail())
    }
 }
 pub trait Consumable<P> {
    type Result: ListPrims;
    fn consume(self) -> Self::Result;
 }
 impl<Args: TuplePrims> Consumable<P0> for List<Args> {
    type Result = Self;
    fn consume(self) -> Self::Result {
        self
    }
 }
 impl<Args, P> Consumable<PNext<P>> for List<Args>
 where
    Self: ListPrims,
    <Self as ListPrims>::Tail: Consumable<P>,
 {
    type Result = <<Self as ListPrims>::Tail as Consumable<P>>::Result;
    fn consume(self) -> Self::Result {
        self.into_tail().consume()
    }
 }
 impl<Args: TuplePrims> List<Args> {
    pub fn consume<P>(self) -> <Self as Consumable<P>>::Result
        where Self: Consumable<P>
    {
        Consumable::<P>::consume(self)
    }
    pub fn get<P>(self) -> <<Self as Consumable<P>>::Result as ListPrims>::Head
        where Self: Consumable<P>
    {
        self.consume().into_head()
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use crate::compound::peano::{P1, P2};
    #[test]
    fn get() {
        let list = List((5u32, 4i32, 3f32));
        assert_eq!(list.get::<P0>(), 5u32);
        assert_eq!(list.get::<P1>(), 4i32);
        assert_eq!(list.get::<P2>(), 3f32);
    }
    // #[test]
    // fn get() {
    //     let list = (5u32, 4i32, 3f32).into_list();
    //     assert_eq!(list.get::<P0>(), &5u32);
    //     assert_eq!(list.get::<P1>(), &4i32);
    //     assert_eq!(list.get::<P2>(), &3f32);
    // }
    // #[test]
    // fn set() {
    //     let mut list = List::<(u32, i32, f32)>::default();
    //     list.set::<P0>(5u32);
    //     assert_eq!(list.get::<P0>(), &5u32);
    //     assert_eq!(list.get::<P1>(), &0i32);
    //     assert_eq!(list.get::<P2>(), &0f32);
    //     list.set::<P2>(3f32);
    //     list.set::<P1>(4i32);
    //     assert_eq!(list.get::<P0>(), &5u32);
    //     assert_eq!(list.get::<P1>(), &4i32);
    //     assert_eq!(list.get::<P2>(), &3f32);
    // }
 }
--- a/crates/cross/src/compound/mod.rs
+++ b/crates/cross/src/compound/mod.rs
@@ -0,0 +1,6 @@
 pub mod enumerated;
 pub mod list;
 mod optional;
 pub mod peano;
 pub use optional::Optional;
--- a/crates/cross/src/compound/optional.rs
+++ b/crates/cross/src/compound/optional.rs
@@ -0,0 +1,86 @@
 #[cfg(feature = "serde")]
 use serde::{Serialize, Deserialize};
 /// This is a spirv-compatible option type.
 /// The native rust Option type produces invalid spirv due to its enum nature; this custom option
 /// type creates code which will actually compile.
 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Copy, Clone, PartialEq)]
 pub struct Optional<T> {
    // XXX: not a bool, because: "entrypoint parameter cannot contain a boolean"
    present: u8,
    data: T,
 }
 impl<T> Optional<T> {
    pub fn some(data: T) -> Self {
        Self {
            present: 1,
            data,
        }
    }
    pub fn explicit_none(data: T) -> Self {
        Self {
            present: 0,
            data,
        }
    }
    pub fn is_some(self) -> bool {
        self.present != 0
    }
    pub fn unwrap(self) -> T {
        debug_assert!(self.present != 0);
        self.data
    }
    pub fn map<U: Default, F: FnOnce(T) -> U>(self, f: F) -> Optional<U> {
        self.and_then(|inner| Optional::some(f(inner)))
    }
    pub fn and_then<U: Default, F: FnOnce(T) -> Optional<U>>(self, f: F) -> Optional<U> {
        if self.present != 0 {
            f(self.data)
        } else {
            Optional::none()
        }
    }
    pub fn unwrap_or(self, default: T) -> T {
        if self.present != 0 {
            self.data
        } else {
            default
        }
    }
 }
 impl<T: Default> Optional<T> {
    pub fn none() -> Self {
        Self::explicit_none(Default::default())
    }
    pub fn unwrap_or_default(self) -> T {
        self.unwrap_or(Default::default())
    }
 }
 impl<T: Default> Default for Optional<T> {
    fn default() -> Self {
        Self::none()
    }
 }
 impl<T0: Default, T1: Default> Optional<(T0, T1)> {
    pub fn flatten((f0, f1): (Optional<T0>, Optional<T1>)) -> Self {
        if f0.present != 0 && f1.present != 0 {
            Optional::some((f0.data, f1.data))
        } else {
            Optional::none()
        }
    }
 }
--- a/crates/cross/src/compound/peano.rs
+++ b/crates/cross/src/compound/peano.rs
@@ -0,0 +1,76 @@
 //! Peano numbers (also known as Church numerals) are type-level natural numbers.
 //! each non-zero Peano number is defined as the unique successor of a previous Peano number.
 //!
 //! - given some Peano number I, we can derive its successor by S=PNext<I>.
 //! - given a Peano number PNext<I>, we can define its predecessor as I=P.
 //! - the base Peano number, which represents 0, is `P0`.
 //! - the `Peano` trait exposes alternative syntaxes for these: `P::Next` and `P::Prev`,
 //!   however the type system can reason less about these (they're just a convenience).
 //!
 //! the primary use of Peano numbers is to allow types to specialize on a specific natural number
 //! out of some larger set of natural numbers. e.g. one might have a `struct List<Length: Peano>`
 //! to allow constructing a list of compile-time constant length.
 //!
 //! this whole module will hopefully be obsoleted as Rust's type-level integers become more
 //! capable, but in 2022 Peano numbers enable more operations (arithmetic, specialization) than type-level integers.
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct PNext<P>(P);
 #[derive(Copy, Clone, Default, PartialEq)]
 pub struct P0;
 pub type P1 = PNext<P0>;
 /// these are exported for the convenience of potential consumers: not needed internally
 mod exports {
    #![allow(dead_code)]
    use super::{P1, PNext};
    pub type P2 = PNext<P1>;
    pub type P3 = PNext<P2>;
    pub type P4 = PNext<P3>;
    pub type P5 = PNext<P4>;
    pub type P6 = PNext<P5>;
    pub type P7 = PNext<P6>;
    pub type P8 = PNext<P7>;
    pub type P9 = PNext<P8>;
    pub type P10 = PNext<P9>;
    pub type P11 = PNext<P10>;
    pub type P12 = PNext<P11>;
    pub type P13 = PNext<P12>;
    pub type P14 = PNext<P13>;
    pub type P15 = PNext<P14>;
 }
 pub use exports::*;
 pub trait Peano: Copy + Clone + Default + PartialEq {
    type Next: PeanoNonZero;
    type PrevOrZero: Peano;
    /// always set to ()
    /// this exists to allow Peano numbers to be used as struct parameters without PhantomData
    type Unit: Copy + Clone + Default + PartialEq;
    const VALUE: u32;
 }
 pub trait PeanoNonZero: Peano {
    type Prev: Peano;
 }
 impl Peano for P0 {
    type Next = P1;
    type PrevOrZero = P0;
    type Unit = ();
    const VALUE: u32 = 0;
 }
 impl<P: Peano> Peano for PNext<P> {
    type Next = PNext<PNext<P>>;
    type PrevOrZero = P;
    type Unit = ();
    const VALUE: u32 = 1 + P::VALUE;
 }
 impl<P: Peano> PeanoNonZero for PNext<P> {
    type Prev = P;
 }
 // A: LessThan<B> is satisfied only if A is strictly less than B.
 pub trait LessThan<P: Peano> { }
 impl<P: Peano> LessThan<PNext<P>> for P { }
--- a/crates/cross/src/dim/dim_slice.rs
+++ b/crates/cross/src/dim/dim_slice.rs
@@ -0,0 +1,369 @@
 use core::convert::{AsMut, AsRef};
 #[cfg(feature = "iter")]
 use core::iter::Zip;
 use core::ops::{Index, IndexMut};
 use crate::vec::Vec3u;
 /// use this to wrap a flat region of memory into something which can be indexed by coordinates in
 /// 3d space.
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Clone, Default, PartialEq)]
 pub struct DimSlice<T> {
    dim: Vec3u,
    items: T,
 }
 impl<T> DimSlice<T> {
    pub fn new(dim: Vec3u, items: T) -> Self {
        Self { dim, items }
    }
    pub fn dim(&self) -> Vec3u {
        self.dim
    }
    pub fn into_inner(self) -> T {
        self.items
    }
    pub fn indices(&self) -> DimIter {
        DimIter::new(self.dim)
    }
    /// re-borrow the slice with a different lifetime.
    pub fn as_ref<R: ?Sized>(&self) -> DimSlice<&R>
        where T: AsRef<R>
    {
        DimSlice::new(self.dim, self.items.as_ref())
    }
    /// re-borrow the slice with a different lifetime.
    pub fn as_mut<R: ?Sized>(&mut self) -> DimSlice<&mut R>
        where T: AsMut<R>
    {
        DimSlice::new(self.dim, self.items.as_mut())
    }
 }
 #[cfg(feature = "iter")]
 impl<T: IntoIterator> DimSlice<T> {
    pub fn enumerated(self) -> Zip<DimIter, T::IntoIter> {
        self.indices().zip(self.into_iter())
    }
 }
 fn index(loc: Vec3u, dim: Vec3u) -> usize {
    ((loc.z()*dim.y() + loc.y())*dim.x() + loc.x()) as usize
 }
 impl<'a, T: Index<usize> + ?Sized> Index<Vec3u> for DimSlice<&'a T> {
    type Output=T::Output;
    fn index(&self, idx: Vec3u) -> &Self::Output {
        let idx = index(idx, self.dim);
        &self.items[idx]
    }
 }
 impl<'a, T: Index<usize> + ?Sized> Index<Vec3u> for DimSlice<&'a mut T> {
    type Output=T::Output;
    fn index(&self, idx: Vec3u) -> &Self::Output {
        let idx = index(idx, self.dim);
        &self.items[idx]
    }
 }
 impl<'a, T: IndexMut<usize> + ?Sized> IndexMut<Vec3u> for DimSlice<&'a mut T> {
    fn index_mut(&mut self, idx: Vec3u) -> &mut Self::Output {
        let idx = index(idx, self.dim);
        &mut self.items[idx]
    }
 }
 #[cfg(feature = "std")]
 impl<T> Index<Vec3u> for DimSlice<Vec<T>> {
    type Output=T;
    fn index(&self, idx: Vec3u) -> &Self::Output {
        let idx = index(idx, self.dim);
        &self.items[idx]
    }
 }
 #[cfg(feature = "std")]
 impl<T> IndexMut<Vec3u> for DimSlice<Vec<T>> {
    fn index_mut(&mut self, idx: Vec3u) -> &mut Self::Output {
        let idx = index(idx, self.dim);
        &mut self.items[idx]
    }
 }
 #[cfg(feature = "std")]
 impl<T> Index<Vec3u> for DimSlice<Box<[T]>> {
    type Output=T;
    fn index(&self, idx: Vec3u) -> &Self::Output {
        let idx = index(idx, self.dim);
        &self.items[idx]
    }
 }
 #[cfg(feature = "std")]
 impl<T> IndexMut<Vec3u> for DimSlice<Box<[T]>> {
    fn index_mut(&mut self, idx: Vec3u) -> &mut Self::Output {
        let idx = index(idx, self.dim);
        &mut self.items[idx]
    }
 }
 impl<T: IntoIterator> IntoIterator for DimSlice<T> {
    type Item = T::Item;
    type IntoIter = T::IntoIter;
    fn into_iter(self) -> Self::IntoIter {
        self.items.into_iter()
    }
 }
 pub struct DimIter {
    // fields are unused if `iter` feature is disabled
    #[allow(unused)]
    idx: Vec3u,
    #[allow(unused)]
    dim: Vec3u,
 }
 impl DimIter {
    fn new(dim: Vec3u) -> Self {
        Self {idx: Vec3u::default(), dim }
    }
 }
 #[cfg(feature = "iter")]
 impl Iterator for DimIter {
    type Item = Vec3u;
    fn next(&mut self) -> Option<Self::Item> {
        if self.dim.x() == 0 || self.dim.y() == 0 || self.dim.z() == 0 {
            // no items
            return None;
        }
        if self.idx.z() == self.dim.z() {
            // reached the end
            return None;
        }
        let cur = self.idx;
        self.idx = match cur.x()+1 {
            // need to increment y
            next_x if next_x == self.dim.x() => match cur.y() + 1 {
                // need to increment z
                next_y if next_y == self.dim.y() => Vec3u::new(0, 0, cur.z() + 1),
                next_y => Vec3u::new(0, next_y, cur.z()),
            },
            next_x => Vec3u::new(next_x, cur.y(), cur.z()),
        };
        Some(cur)
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn test_index() {
        let dim = Vec3u::new(2, 3, 7);
        assert_eq!(index(Vec3u::new(0, 0, 0), dim), 0);
        assert_eq!(index(Vec3u::new(1, 0, 0), dim), 1);
        assert_eq!(index(Vec3u::new(0, 1, 0), dim), 2);
        assert_eq!(index(Vec3u::new(1, 1, 0), dim), 3);
        assert_eq!(index(Vec3u::new(0, 2, 0), dim), 4);
        assert_eq!(index(Vec3u::new(0, 0, 1), dim), 6);
        assert_eq!(index(Vec3u::new(1, 0, 1), dim), 7);
        assert_eq!(index(Vec3u::new(0, 1, 1), dim), 8);
        assert_eq!(index(Vec3u::new(1, 2, 1), dim), 11);
        assert_eq!(index(Vec3u::new(1, 2, 2), dim), 17);
    }
    #[test]
    fn as_ref() {
        let data = [1, 2];
        let s = DimSlice::new(Vec3u::new(1, 2, 1), &data[..]);
        assert_eq!(s.as_ref(), DimSlice::new(Vec3u::new(1, 2, 1), &data[..]));
    }
    #[test]
    fn dim_slice_index() {
        let data = [
            0, 1, 2,
            3, 4, 5,
            0, 10,20,
            30,40,50,
        ];
        let s = DimSlice::new(Vec3u::new(3, 2, 2), &data);
        assert_eq!(s[Vec3u::new(0, 0, 0)], 0);
        assert_eq!(s[Vec3u::new(1, 0, 0)], 1);
        assert_eq!(s[Vec3u::new(1, 1, 0)], 4);
        assert_eq!(s[Vec3u::new(1, 1, 1)], 40);
        assert_eq!(s[Vec3u::new(2, 1, 1)], 50);
    }
    #[test]
    fn dim_slice_index_mut() {
        let mut data = [
            0, 1, 2,
            3, 4, 5,
            0, 10,20,
            30,40,50,
        ];
        let mut s = DimSlice::new(Vec3u::new(3, 2, 2), &mut data);
        s[Vec3u::new(0, 0, 0)] = 100;
        s[Vec3u::new(0, 1, 1)] = 300;
        assert_eq!(data, [
            100,1, 2,
            3,  4, 5,
            0, 10, 20,
            300,40,50,
        ]);
    }
    #[test]
    fn dim_slice_into_iter() {
        let data = [1, 2, 3, 4, 5, 6];
        let s = DimSlice::new(Vec3u::new(3, 1, 2), &data);
        let mut i = s.into_iter();
        assert_eq!(*i.next().unwrap(), 1);
        assert_eq!(*i.next().unwrap(), 2);
        assert_eq!(*i.next().unwrap(), 3);
        assert_eq!(*i.next().unwrap(), 4);
        assert_eq!(*i.next().unwrap(), 5);
        assert_eq!(*i.next().unwrap(), 6);
        assert_eq!(i.next(), None);
    }
    #[test]
    fn dim_slice_into_iter_mut() {
        let mut data = [1, 2, 3, 4, 5, 6];
        let s = DimSlice::new(Vec3u::new(3, 1, 2), &mut data);
        let mut i = s.into_iter();
        *i.next().unwrap() = 10;
        assert_eq!(*i.next().unwrap(), 2);
        *i.next().unwrap() += 27;
        assert_eq!(*i.next().unwrap(), 4);
        *i.next().unwrap() *= 10;
        assert_eq!(*i.next().unwrap(), 6);
        assert_eq!(i.next(), None);
        assert_eq!(data, [10,2,30,4,50,6]);
    }
    #[test]
    fn dim_slice_indices() {
        let s = DimSlice::new(Vec3u::new(4, 3, 2), &[()]);
        let mut i = s.indices();
        assert_eq!(i.next().unwrap(), Vec3u::new(0, 0, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(1, 0, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(2, 0, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(3, 0, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(0, 1, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(1, 1, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(2, 1, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(3, 1, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(0, 2, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(1, 2, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(2, 2, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(3, 2, 0));
        assert_eq!(i.next().unwrap(), Vec3u::new(0, 0, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(1, 0, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(2, 0, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(3, 0, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(0, 1, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(1, 1, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(2, 1, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(3, 1, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(0, 2, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(1, 2, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(2, 2, 1));
        assert_eq!(i.next().unwrap(), Vec3u::new(3, 2, 1));
        assert_eq!(i.next(), None);
        assert_eq!(i.next(), None);
    }
    #[test]
    fn dim_slice_indices_zero_dim() {
        let s = DimSlice::new(Vec3u::new(4, 3, 0), &[()]);
        assert_eq!(s.indices().next(), None);
        assert_eq!(s.indices().next(), None);
        let s = DimSlice::new(Vec3u::new(4, 0, 2), &[()]);
        assert_eq!(s.indices().next(), None);
        assert_eq!(s.indices().next(), None);
        let s = DimSlice::new(Vec3u::new(0, 3, 2), &[()]);
        assert_eq!(s.indices().next(), None);
        assert_eq!(s.indices().next(), None);
        let s = DimSlice::new(Vec3u::new(3, 0, 0), &[()]);
        assert_eq!(s.indices().next(), None);
        assert_eq!(s.indices().next(), None);
        let s = DimSlice::new(Vec3u::new(0, 1, 0), &[()]);
        assert_eq!(s.indices().next(), None);
        assert_eq!(s.indices().next(), None);
        let s = DimSlice::new(Vec3u::new(0, 0, 2), &[()]);
        assert_eq!(s.indices().next(), None);
        assert_eq!(s.indices().next(), None);
    }
    #[test]
    fn dim_slice_enumerated() {
        let data = [
            10, 11,
            20, 21,
            30, 31,
        ];
        let s = DimSlice::new(Vec3u::new(1, 2, 3), &data);
        let mut i = s.enumerated();
        assert_eq!(i.next().unwrap(), (Vec3u::new(0, 0, 0), &10));
        assert_eq!(i.next().unwrap(), (Vec3u::new(0, 1, 0), &11));
        assert_eq!(i.next().unwrap(), (Vec3u::new(0, 0, 1), &20));
        assert_eq!(i.next().unwrap(), (Vec3u::new(0, 1, 1), &21));
        assert_eq!(i.next().unwrap(), (Vec3u::new(0, 0, 2), &30));
        assert_eq!(i.next().unwrap(), (Vec3u::new(0, 1, 2), &31));
        assert_eq!(i.next(), None);
    }
    #[test]
    fn dim_slice_enumerated_mut() {
        let mut data = [
            10, 11,
            20, 21,
            30, 31,
        ];
        let s = DimSlice::new(Vec3u::new(2, 1, 3), &mut data);
        let mut i = s.enumerated();
        let (idx, v) = i.next().unwrap();
        assert_eq!(idx, Vec3u::new(0, 0, 0));
        *v = 100;
        let (idx, v) = i.next().unwrap();
        assert_eq!(idx, Vec3u::new(1, 0, 0));
        *v = 110;
        i.next().unwrap();
        let (idx, v) = i.next().unwrap();
        assert_eq!(idx, Vec3u::new(1, 0, 1));
        *v = 210;
        assert_eq!(data, [100, 110, 20, 210, 30, 31]);
    }
 }
--- a/crates/cross/src/dim/mod.rs
+++ b/crates/cross/src/dim/mod.rs
@@ -0,0 +1,7 @@
 mod dim_slice;
 mod offset_dim_slice;
 pub use dim_slice::{
    DimSlice,
    DimIter,
 };
 pub use offset_dim_slice::OffsetDimSlice;
--- a/crates/cross/src/dim/offset_dim_slice.rs
+++ b/crates/cross/src/dim/offset_dim_slice.rs
@@ -0,0 +1,227 @@
 use core::convert::{AsMut, AsRef};
 #[cfg(feature = "iter")]
 use core::iter::Zip;
 use core::ops::{Index, IndexMut};
 use crate::dim::{DimIter, DimSlice};
 use crate::vec::Vec3u;
 #[cfg_attr(feature = "fmt", derive(Debug))]
 #[derive(Clone, Default, PartialEq)]
 pub struct OffsetDimSlice<T> {
    offset: Vec3u,
    inner: DimSlice<T>,
 }
 impl<T> OffsetDimSlice<T> {
    pub fn new(offset: Vec3u, dim: Vec3u, items: T) -> Self {
        Self { offset, inner: DimSlice::new(dim, items) }
    }
    pub fn dim(&self) -> Vec3u {
        self.inner.dim()
    }
    pub fn offset(&self) -> Vec3u {
        self.offset
    }
    pub fn into_inner(self) -> T {
        self.inner.into_inner()
    }
    pub fn indices(&self) -> OffsetDimIter {
        OffsetDimIter::new(self.offset, self.inner.indices())
    }
    /// re-borrow the slice with a different lifetime.
    pub fn as_ref<R: ?Sized>(&self) -> OffsetDimSlice<&R>
        where T: AsRef<R>
    {
        OffsetDimSlice { offset: self.offset, inner: self.inner.as_ref()}
    }
    /// re-borrow the slice with a different lifetime.
    pub fn as_mut<R: ?Sized>(&mut self) -> OffsetDimSlice<&mut R>
        where T: AsMut<R>
    {
        OffsetDimSlice { offset: self.offset, inner: self.inner.as_mut()}
    }
 }
 #[cfg(feature = "iter")]
 impl<T: IntoIterator> OffsetDimSlice<T> {
    pub fn enumerated(self) -> Zip<OffsetDimIter, T::IntoIter> {
        self.indices().zip(self.into_iter())
    }
 }
 impl<T> Index<Vec3u> for OffsetDimSlice<T>
 where
    DimSlice<T>: Index<Vec3u>
 {
    type Output=<DimSlice<T> as Index<Vec3u>>::Output;
    fn index(&self, idx: Vec3u) -> &Self::Output {
        &self.inner[idx - self.offset]
    }
 }
 impl<T> IndexMut<Vec3u> for OffsetDimSlice<T>
 where
    DimSlice<T>: IndexMut<Vec3u>
 {
    fn index_mut(&mut self, idx: Vec3u) -> &mut Self::Output {
        &mut self.inner[idx - self.offset]
    }
 }
 impl<T: IntoIterator> IntoIterator for OffsetDimSlice<T> {
    type Item = T::Item;
    type IntoIter = T::IntoIter;
    fn into_iter(self) -> Self::IntoIter {
        self.inner.into_iter()
    }
 }
 pub struct OffsetDimIter {
    // fields are unused if `iter` feature is disabled
    #[allow(unused)]
    offset: Vec3u,
    #[allow(unused)]
    inner: DimIter,
 }
 impl OffsetDimIter {
    fn new(offset: Vec3u, inner: DimIter) -> Self {
        Self { offset, inner }
    }
 }
 #[cfg(feature = "iter")]
 impl Iterator for OffsetDimIter {
    type Item = Vec3u;
    fn next(&mut self) -> Option<Self::Item> {
        self.inner.next().map(|i| i + self.offset)
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn offset_dim_slice_index() {
        let data = [
            0, 1, 2,
            3, 4, 5,
            0, 10,20,
            30,40,50,
        ];
        let s = OffsetDimSlice::new(Vec3u::new(1, 2, 3), Vec3u::new(3, 2, 2), &data);
        assert_eq!(s[Vec3u::new(1, 2, 3)], 0);
        assert_eq!(s[Vec3u::new(2, 2, 3)], 1);
        assert_eq!(s[Vec3u::new(2, 3, 3)], 4);
        assert_eq!(s[Vec3u::new(2, 3, 4)], 40);
        assert_eq!(s[Vec3u::new(3, 3, 4)], 50);
    }
    #[test]
    fn offset_dim_slice_index_mut() {
        let mut data = [
            0, 1, 2,
            3, 4, 5,
            0, 10,20,
            30,40,50,
        ];
        let mut s = OffsetDimSlice::new(Vec3u::new(1, 2, 3), Vec3u::new(3, 2, 2), &mut data);
        s[Vec3u::new(1, 2, 3)] = 100;
        s[Vec3u::new(1, 3, 4)] = 300;
        assert_eq!(data, [
            100,1, 2,
            3,  4, 5,
            0, 10, 20,
            300,40,50,
        ]);
    }
    #[test]
    fn offset_dim_slice_into_iter() {
        let data = [1, 2, 3, 4];
        let s = OffsetDimSlice::new(Vec3u::new(1, 2, 3), Vec3u::new(2, 1, 2), &data);
        let mut i = s.into_iter();
        assert_eq!(*i.next().unwrap(), 1);
        assert_eq!(*i.next().unwrap(), 2);
        assert_eq!(*i.next().unwrap(), 3);
        assert_eq!(*i.next().unwrap(), 4);
        assert_eq!(i.next(), None);
    }
    #[test]
    fn offset_dim_slice_into_iter_mut() {
        let mut data = [1, 2, 3, 4];
        let s = OffsetDimSlice::new(Vec3u::new(1, 2, 3), Vec3u::new(2, 1, 2), &mut data);
        let mut i = s.into_iter();
        *i.next().unwrap() = 10;
        assert_eq!(*i.next().unwrap(), 2);
        *i.next().unwrap() += 27;
        assert_eq!(*i.next().unwrap(), 4);
        assert_eq!(i.next(), None);
        assert_eq!(data, [10,2,30,4]);
    }
    #[test]
    fn offset_dim_slice_indices() {
        let s = OffsetDimSlice::new(Vec3u::new(10, 20, 30), Vec3u::new(2, 1, 2), &[()]);
        let mut i = s.indices();
        assert_eq!(i.next().unwrap(), Vec3u::new(10, 20, 30));
        assert_eq!(i.next().unwrap(), Vec3u::new(11, 20, 30));
        assert_eq!(i.next().unwrap(), Vec3u::new(10, 20, 31));
        assert_eq!(i.next().unwrap(), Vec3u::new(11, 20, 31));
        assert_eq!(i.next(), None);
    }
    #[test]
    fn offset_dim_slice_enumerated() {
        let data = [
            10, 11,
            20, 21,
            30, 31,
        ];
        let s = OffsetDimSlice::new(Vec3u::new(10, 20, 30), Vec3u::new(1, 2, 2), &data);
        let mut i = s.enumerated();
        assert_eq!(i.next().unwrap(), (Vec3u::new(10, 20, 30), &10));
        assert_eq!(i.next().unwrap(), (Vec3u::new(10, 21, 30), &11));
        assert_eq!(i.next().unwrap(), (Vec3u::new(10, 20, 31), &20));
        assert_eq!(i.next().unwrap(), (Vec3u::new(10, 21, 31), &21));
        assert_eq!(i.next(), None);
    }
    #[test]
    fn offset_dim_slice_enumerated_mut() {
        let mut data = [
            10, 11,
            20, 21,
        ];
        let s = OffsetDimSlice::new(Vec3u::new(10, 20, 30), Vec3u::new(2, 1, 2), &mut data);
        let mut i = s.enumerated();
        let (idx, v) = i.next().unwrap();
        assert_eq!(idx, Vec3u::new(10, 20, 30));
        *v = 100;
        i.next().unwrap();
        i.next().unwrap();
        let (idx, v) = i.next().unwrap();
        assert_eq!(idx, Vec3u::new(11, 20, 31));
        *v = 210;
        assert_eq!(i.next(), None);
        assert_eq!(data, [100, 11, 20, 210]);
    }
 }
--- a/crates/cross/src/lib.rs
+++ b/crates/cross/src/lib.rs
@@ -0,0 +1,11 @@
 #![feature(core_intrinsics)]
 #![cfg_attr(not(feature = "std"), no_std)]
 pub mod compound;
 pub mod dim;
 pub mod mat;
 pub mod real;
 pub mod step;
 pub mod vec;
 // private because `vec` re-exports to important vecu constructs
 mod vecu;
--- a/Show More
+++ b/Show More