README: fix up stale paths, material references
This commit is contained in:
137
README.md
137
README.md
@@ -5,7 +5,7 @@ to model the evolution of some 3d (or 2d) grid-volume of space over time. simula
|
|||||||
|
|
||||||
- some material at each position in the grid
|
- some material at each position in the grid
|
||||||
- a set of stimuli to apply at specific regions in the volume over time
|
- a set of stimuli to apply at specific regions in the volume over time
|
||||||
- a set of "measurements" to evaluate and record as the simulation evolves.
|
- a set of "measurements" to evaluate and record as the simulation evolves
|
||||||
- an optional state file to allow pausing/resumption of long-run simulations
|
- an optional state file to allow pausing/resumption of long-run simulations
|
||||||
|
|
||||||
after this the simulation is advanced in steps up to some user-specified moment in time.
|
after this the simulation is advanced in steps up to some user-specified moment in time.
|
||||||
@@ -19,8 +19,20 @@ examples are in the [crates/applications/](crates/applications/) directory.
|
|||||||
here's an excerpt from the [wavefront](crates/applications/wavefront/src/main.rs) example:
|
here's an excerpt from the [wavefront](crates/applications/wavefront/src/main.rs) example:
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
// Create the simulation "driver" which uses the CPU as backend.
|
// use a general-purpose material, capable of representing vacuum, conductors, and magnetic materials.
|
||||||
let mut driver: driver::CpuDriver = driver::Driver::new(size, feature_size);
|
type Mat = mat::FullyGenericMaterial<f32>;
|
||||||
|
|
||||||
|
// simulate a volume of 401x401x1 discrete grid cells.
|
||||||
|
let (width, height, depth) = (401, 401, 1);
|
||||||
|
let size = Index::new(width, height, depth);
|
||||||
|
// each cell represents 1um x 1um x 1um volume.
|
||||||
|
let feature_size = 1e-6;
|
||||||
|
|
||||||
|
// create the simulation "driver".
|
||||||
|
// the first parameter is the float type to use: f32 for unchecked math, coremem::real::R32
|
||||||
|
// to guard against NaN/Inf (useful for debugging).
|
||||||
|
// to run this on the gpu instead of the gpu, replace `CpuBackend` with `WgpuBackend`.
|
||||||
|
let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::CpuBackend>::new(size, feature_size));
|
||||||
|
|
||||||
// create a conductor on the left side.
|
// create a conductor on the left side.
|
||||||
let conductor = Cube::new(
|
let conductor = Cube::new(
|
||||||
@@ -34,17 +46,18 @@ let center_region = Cube::new(
|
|||||||
Index::new(200, height/4, 0).to_meters(feature_size),
|
Index::new(200, height/4, 0).to_meters(feature_size),
|
||||||
Index::new(201, height*3/4, 1).to_meters(feature_size),
|
Index::new(201, height*3/4, 1).to_meters(feature_size),
|
||||||
);
|
);
|
||||||
|
|
||||||
// emit a constant E/H delta over this region for 100 femtoseconds
|
// emit a constant E/H delta over this region for 100 femtoseconds
|
||||||
let stim = Stimulus::new(
|
let stim = ModulatedVectorField::new(
|
||||||
center_region,
|
RegionGated::new(center_region, Fields::new_eh(
|
||||||
UniformStimulus::new(
|
Vec3::new(2e19, 0.0, 0.0),
|
||||||
Vec3::new(2e19, 0.0, 0.0), // E field (per second)
|
Vec3::new(0.0, 0.0, 2e19/376.730),
|
||||||
Vec3::new(0.0, 0.0, 2e19/376.730) // H field (per second)
|
)),
|
||||||
).gated(0.0, 100e-15),
|
Pulse::new(0.0, 100e-15),
|
||||||
);
|
);
|
||||||
driver.add_stimulus(stim);
|
driver.add_stimulus(stim);
|
||||||
|
|
||||||
// finally, run the simulation:
|
// finally, run the simulation through t=100ps
|
||||||
driver.step_until(Seconds(100e-12));
|
driver.step_until(Seconds(100e-12));
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -74,8 +87,8 @@ $ rustup component add rust-src rustc-dev llvm-tools-preview
|
|||||||
|
|
||||||
now you can swap out the `CpuDriver` with a `SpirvDriver` and you're set:
|
now you can swap out the `CpuDriver` with a `SpirvDriver` and you're set:
|
||||||
```diff
|
```diff
|
||||||
- let mut driver: driver::CpuDriver = driver::Driver::new(size, feature_size);
|
- let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::CpuBackend>::new(size, feature_size));
|
||||||
+ let mut driver: driver::SpirvDriver = driver::Driver::new_spirv(size, feature_size);
|
+ let mut driver = Driver::new(SpirvSim::<f32, Mat, spirv::WgpuBackend>::new(size, feature_size));
|
||||||
```
|
```
|
||||||
|
|
||||||
re-run it as before and you should see the same results:
|
re-run it as before and you should see the same results:
|
||||||
@@ -90,7 +103,7 @@ see the "Processing Loop" section below to understand what GPU acceleration enta
|
|||||||
|
|
||||||
the [sr\_latch](crates/applications/sr_latch/src/main.rs) example explores a more interesting feature set.
|
the [sr\_latch](crates/applications/sr_latch/src/main.rs) example explores a more interesting feature set.
|
||||||
first, it "measures" a bunch of parameters over different regions of the simulation
|
first, it "measures" a bunch of parameters over different regions of the simulation
|
||||||
(peak inside [`src/meas.rs`](crates/coremem/src/meas.rs) to see how these each work):
|
(peak inside [`crates/coremem/src/meas.rs`](crates/coremem/src/meas.rs) to see how these each work):
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
// measure a bunch of items of interest throughout the whole simulation duration:
|
// measure a bunch of items of interest throughout the whole simulation duration:
|
||||||
@@ -121,7 +134,7 @@ allowing you to dig further into the simulation in an _interactive_ way (versus
|
|||||||
renderer used in the `wavefront` example):
|
renderer used in the `wavefront` example):
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
// serialize frames for later viewing with `cargo run -p coremem_post --release --bin viewer`
|
// serialize frames for later viewing with `cargo run --release --bin viewer`
|
||||||
driver.add_serializer_renderer(&*format!("{}frame-", prefix), 36000, None);
|
driver.add_serializer_renderer(&*format!("{}frame-", prefix), 36000, None);
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -134,7 +147,7 @@ $ cargo run --release --example sr_latch
|
|||||||
and then investigate the results with
|
and then investigate the results with
|
||||||
|
|
||||||
```
|
```
|
||||||
$ cargo run -p coremem_post --bin viewer ./out/applications/sr_latch
|
$ cargo run --release --bin viewer ./out/applications/sr_latch
|
||||||
```
|
```
|
||||||

|

|
||||||
|
|
||||||
@@ -145,20 +158,21 @@ the light blue splotches depict the conductors (in the center, the wire coupling
|
|||||||
|
|
||||||
what we see here is that both ferrites (the two large circles in the above image) have a clockwise polarized B field. this is in the middle of a transition, so the E fields look a bit chaotic. advance to t=46 ns: the "reset" pulse was applied at t=24ns and had 22ns to settle:
|
what we see here is that both ferrites (the two large circles in the above image) have a clockwise polarized B field. this is in the middle of a transition, so the E fields look a bit chaotic. advance to t=46 ns: the "reset" pulse was applied at t=24ns and had 22ns to settle:
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
we can see the "reset" pulse has polarized both ferrites in the CCW orientation this time. the E field is less pronounced because we gave the system 22ns instead of 3ns to settle this time.
|
we can see the "reset" pulse has polarized both ferrites in the counter-clockwise orientation this time. the E field is less pronounced because we gave the system 22ns instead of 3ns to settle this time.
|
||||||
|
|
||||||
the graphical viewer is helpful for debugging geometries, but the CSV measurements are useful for viewing numeric system performance. peak inside "out/applications/sr-latch/meas.csv" to see a bunch of measurements over time. you can use a tool like Excel or [visidata](https://www.visidata.org/) to plot the interesting ones.
|
the graphical viewer is helpful for debugging geometries, but the CSV measurements are useful for viewing numeric system performance. peak inside "out/applications/sr-latch/meas.csv" to see a bunch of measurements over time. you can use a tool like Excel or [visidata](https://www.visidata.org/) to plot the interesting ones.
|
||||||
|
|
||||||
here's a plot of `M(mem2)` over time from the SR latch simulation. we're measuring the (average) M value along the major tangent to the torus corresponding to the ferrite on the right in the images above. the notable bumps correspond to these pulses: "set", "reset", "set", "reset", "set+reset applied simultaneously", "set", "set".
|
here's a plot of `M(mem2)` over time from the SR latch simulation. we're measuring, over the torus volume corresponding to the ferrite on the right in the images above, the (average) M component normal to each given cross section of the torus. the notable bumps correspond to these pulses: "set", "reset", "set", "reset", "set+reset applied simultaneously", "set", "set".
|
||||||
|
|
||||||
|
|
||||||
 over time")
|
 over time")
|
||||||
|
|
||||||
|
|
||||||
## Processing Loop (and how GPU acceleration works)
|
## Processing Loop (and how GPU acceleration works)
|
||||||
|
|
||||||
the processing loop for a simulation is roughly as follows ([`src/driver.rs:step_until`](crates/coremem/src/driver.rs) drives this loop):
|
the processing loop for a simulation is roughly as follows ([`crates/coremem/src/driver.rs:step_until`](crates/coremem/src/driver.rs) drives this loop):
|
||||||
1. evaluate all stimuli at the present moment in time; these produce an "externally applied" E and H field
|
1. evaluate all stimuli at the present moment in time; these produce an "externally applied" E and H field
|
||||||
across the entire volume.
|
across the entire volume.
|
||||||
2. apply the FDTD update equations to "step" the E field, and then "step" the H field. these equations take the external stimulus from step 1 into account.
|
2. apply the FDTD update equations to "step" the E field, and then "step" the H field. these equations take the external stimulus from step 1 into account.
|
||||||
@@ -167,13 +181,15 @@ the processing loop for a simulation is roughly as follows ([`src/driver.rs:step
|
|||||||
|
|
||||||
within each step above, the logic is multi-threaded and the rendeveous points lie at the step boundaries.
|
within each step above, the logic is multi-threaded and the rendeveous points lie at the step boundaries.
|
||||||
|
|
||||||
it turns out that the Courant rules force us to evaluate FDTD updates (step 2) on a _far_ smaller time scale than the other steps are sensitive to. so to tune for performance, we apply some optimizations here universally:
|
it turns out that the Courant rules force us to evaluate FDTD updates (step 2) on a _far_ smaller time scale than the other steps are sensitive to. so to tune for performance, we apply some optimizations:
|
||||||
- stimuli (step 1) are evaluated only once every N frames (tunable). we still *apply* them on each frame individually. the waveform resembles that of a Sample & Hold circuit.
|
- stimuli (step 1) are evaluated only once every N frames. we still *apply* them on each frame individually. the waveform resembles that of a Sample & Hold circuit.
|
||||||
- measurement functions (step 3) are triggered only once every M frames.
|
- measurement functions (step 3) are triggered only once every M frames.
|
||||||
- the state is serialized (step 4) only once every Z frames.
|
- the state is serialized (step 4) only once every Z frames.
|
||||||
|
|
||||||
|
`N`, `M`, and `Z` are all tunable by the application.
|
||||||
|
|
||||||
as a result, step 2 is actually able to apply the FDTD update functions not just once but up to `min(N, M, Z)` times.
|
as a result, step 2 is actually able to apply the FDTD update functions not just once but up to `min(N, M, Z)` times.
|
||||||
although steps 1 and 3 vary heavily based on the user configuration of the simulation, step 2 can be defined pretty narrowly in code (no user-callbacks/dynamic function calls/etc). this lets us offload the processing of step 2 to a dedicated GPU. by tuning N/M/Z, step 2 becomes the dominant cost in our simulations an GPU offloading can trivially boost performance by more than an order of magnitude on even a mid-range consumer GPU.
|
although steps 1 and 3 vary heavily based on the user configuration of the simulation, step 2 can be defined pretty narrowly in code (no user-callbacks/dynamic function calls/etc). this lets us offload the processing of step 2 to a dedicated GPU. by tuning N/M/Z, step 2 becomes the dominant cost in our simulations and GPU offloading can easily boost performance by more than an order of magnitude on even a mid-range consumer GPU.
|
||||||
|
|
||||||
# Features
|
# Features
|
||||||
|
|
||||||
@@ -183,76 +199,55 @@ this library takes effort to separate the following from the core/math-heavy "si
|
|||||||
- Measurements
|
- Measurements
|
||||||
- Render targets (video, CSV, etc)
|
- Render targets (video, CSV, etc)
|
||||||
- Materials (conductors, non-linear ferromagnets)
|
- Materials (conductors, non-linear ferromagnets)
|
||||||
- Float implementation (for CPU simulations only)
|
- Float implementation
|
||||||
|
|
||||||
the simulation only interacts with these things through a trait interface, such that they're each swappable.
|
the simulation only interacts with these things through a trait interface, such that they're each swappable.
|
||||||
|
|
||||||
common stimuli type live in [src/stim.rs](crates/coremem/src/stim.rs).
|
common stimuli type live in [crates/coremem/src/stim/](crates/coremem/src/stim/).
|
||||||
common measurements live in [src/meas.rs](crates/coremem/src/meas.rs).
|
common measurements live in [crates/coremem/src/meas.rs](crates/coremem/src/meas.rs).
|
||||||
common render targets live in [src/render.rs](crates/coremem/src/render.rs). these change infrequently enough that [src/driver.rs](crates/coremem/src/driver.rs) has some specialized helpers for each render backend.
|
common render targets live in [crates/coremem/src/render.rs](crates/coremem/src/render.rs). these change infrequently enough that [crates/coremem/src/driver.rs](crates/coremem/src/driver.rs) has some specialized helpers for each render backend.
|
||||||
common materials are spread throughout [src/mat](crates/coremem/src/mat/mod.rs).
|
common materials are spread throughout [crates/cross/src/mat/](crates/cross/src/mat/).
|
||||||
different float implementations live in [src/real.rs](crates/coremem/src/real.rs).
|
different float implementations live in [crates/cross/src/real.rs](crates/cross/src/real.rs).
|
||||||
if you're getting NaNs, you can run the entire simulation on a checked `R64` type in order to pinpoint the moment those are introduced.
|
if you're getting NaNs, you can run the entire simulation on a checked `R64` (CPU-only) or `R32` (any backend) type in order to pinpoint the moment those are introduced.
|
||||||
|
|
||||||
## Materials
|
## Materials
|
||||||
|
|
||||||
of these, the materials have the most "gotchas".
|
each cell is modeled as having a vector E, H and M field, as well as a Material type defined by the application.
|
||||||
each cell owns an associated material instance.
|
|
||||||
in the original CPU implementation of this library, each cell had a `E` and `H` component,
|
|
||||||
and any additional state was required to be held in the material. so a conductor material
|
|
||||||
might hold only some immutable `conductivity` parameter, while a ferromagnetic material
|
|
||||||
might hold similar immutable material parameters _and also a mutable `M` field_.
|
|
||||||
|
|
||||||
spirv/rust-gpu requires stronger separation of state, and so this `M` field had to be lifted
|
the `Material` trait has the following methods (both are optional):
|
||||||
completely out of the material. as a result, the material API differs slightly between the CPU
|
```
|
||||||
and spirv backends. as you saw in the examples, that difference doesn't have to appear at the user
|
pub trait Material<R: Real>: Sized {
|
||||||
level, but you will see it if you're adding new materials.
|
fn conductivity(&self) -> Vec3<R>;
|
||||||
|
/// returns the new M vector for this material. called during each `step_h`.
|
||||||
|
fn move_b_vec(&self, m: Vec3<R>, target_b: Vec3<R>) -> Vec3<R>;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### Spirv Materials
|
to add a new material:
|
||||||
|
- for `CpuBackend` simulations: just implement this trait on your own type and instantiate a `SpirvSim` specialized over that material instead of `FullyGenericMaterial`.
|
||||||
all the materials usable in the spirv backend live in [`crates/spirv_backend/src/mat.rs`](crates/spirv_backend/src/mat.rs).
|
- for `WgpuBackend` simulations, do the above and add a spirv entry-point specialized to your material. scroll to the bottom of `crates/spirv_backend/src/lib.rs` and follow the examples.
|
||||||
to add a new one, implement the `Material` trait in that file on some new type, which must also
|
|
||||||
be in that file.
|
|
||||||
|
|
||||||
next, add an analog type somewhere in the main library, like [`src/mat/mh_ferromagnet.rs`](crates/coremem/src/mat/mh_ferromagnet.rs). this will
|
|
||||||
be the user-facing material.
|
|
||||||
now implement the `IntoFfi` and `IntoLib` traits for this new material inside [`src/sim/spirv/bindings.rs`](crates/coremem/src/sim/spirv/bindings.rs)
|
|
||||||
so that the spirv backend can translate between its GPU-side material and your CPU-side/user-facing material.
|
|
||||||
|
|
||||||
finally, because cpu-side `SpirvSim<M>` is parameterized over a material, but the underlying spirv library
|
|
||||||
is compiled separately, the spirv library needs specialized dispatch logic for each value of `M` you might want
|
|
||||||
to use. add this to [`crates/spirv_backend/src/lib.rs`](crates/spirv_backend/src/lib.rs) (it's about five lines: follow the example of `Iso3R1`).
|
|
||||||
|
|
||||||
|
|
||||||
### CPU Materials
|
|
||||||
|
|
||||||
adding a CPU material is "simpler". just implement the `Material` trait in [`src/mat/mod.rs`](crates/coremem/src/mat/mod.rs).
|
|
||||||
either link that material into the `GenericMaterial` type in the same file (if you want to easily
|
|
||||||
mix materials within the same simulation), or if that material can handle every cell in your
|
|
||||||
simulation then instantiance a `SimState<M>` object which is directly parameterized over your material.
|
|
||||||
|
|
||||||
|
as can be seen, the Material trait is fairly restrictive. its methods are immutable, and it doesn't even have access to the entire cell state (only the cell's M value, during `move_b_vec`). i'd be receptive to a PR or request that exposes more cell state or mutability: this is just an artifact of me tailoring this specifically to the class of materials i intended to use it for.
|
||||||
|
|
||||||
## What's in the Box
|
## What's in the Box
|
||||||
|
|
||||||
this library ships with the following materials:
|
this library ships with the following materials:
|
||||||
- conductors (Isomorphic or Anisomorphic). supports CPU or GPU.
|
- conductors (Isomorphic or Anisomorphic).
|
||||||
- linear magnets (defined by their relative permeability, mu\_r). supports CPU only.
|
|
||||||
- a handful of ferromagnet implementations:
|
- a handful of ferromagnet implementations:
|
||||||
- `MHPgram` specifies the `M(H)` function as a parallelogram. supports CPU or GPU.
|
- `MHPgram` specifies the `M(H)` function as a parallelogram.
|
||||||
- `MBPgram` specifies the `M(B)` function as a parallelogram. supports CPU or GPU.
|
- `MBPgram` specifies the `M(B)` function as a parallelogram.
|
||||||
- `MHCurve` specifies the `M(H)` function as an arbitrary polygon. requires a new type for each curve for memory reasons (see `Ferroxcube3R1`). supports CPU only.
|
|
||||||
|
|
||||||
measurements include ([src/meas.rs](crates/coremem/src/meas.rs)):
|
measurements include ([crates/coremem/src/meas.rs](crates/coremem/src/meas.rs)):
|
||||||
- E, B or H field (mean vector over some region)
|
- E, B or H field (mean vector over some region)
|
||||||
- energy, power (net over some region)
|
- energy, power (net over some region)
|
||||||
- current (mean vector over some region)
|
- current (mean vector over some region)
|
||||||
- mean current magnitude along a closed loop (toroidal loops only)
|
- mean current magnitude along a closed loop (toroidal loops only)
|
||||||
- mean magnetic polarization magnitude along a closed loop (toroidal loops only)
|
- mean magnetic polarization magnitude along a closed loop (toroidal loops only)
|
||||||
|
|
||||||
output targets include ([src/render.rs](crates/coremem/src/render.rs)):
|
output targets include ([crates/coremem/src/render.rs](crates/coremem/src/render.rs)):
|
||||||
- `ColorTermRenderer`: renders 2d-slices in real-time to the terminal.
|
- `ColorTermRenderer`: renders 2d-slices in real-time to the terminal.
|
||||||
- `Y4MRenderer`: outputs 2d-slices to an uncompressed `y4m` video file.
|
- `Y4MRenderer`: outputs 2d-slices to an uncompressed `y4m` video file.
|
||||||
- `SerializerRenderer`: dumps the full 3d simulation state to disk. parseable after the fact with [src/bin/viewer.rs](crates/post/src/bin/viewer.rs).
|
- `SerializerRenderer`: dumps the full 3d simulation state to disk. parseable after the fact with [crates/post/src/bin/viewer.rs](crates/post/src/bin/viewer.rs).
|
||||||
- `CsvRenderer`: dumps the output of all measurements into a `csv` file.
|
- `CsvRenderer`: dumps the output of all measurements into a `csv` file.
|
||||||
|
|
||||||
historically there was also a plotly renderer, but that effort was redirected into developing the viewer tool better.
|
historically there was also a plotly renderer, but that effort was redirected into developing the viewer tool better.
|
||||||
@@ -266,12 +261,12 @@ in a FDTD simulation, as we shrink the cell size the time step has to shrink too
|
|||||||
|
|
||||||
this is the "default" optimized version. you could introduce a new material to the simulation, and performance would remain constant. as you finalize your simulation, you can specialize it a bit and compile the GPU code to optimize for your specific material. this can squeeze another factor-of-2 gain: view [buffer\_proto5](crates/applications/buffer_proto5/src/main.rs) to see how that's done.
|
this is the "default" optimized version. you could introduce a new material to the simulation, and performance would remain constant. as you finalize your simulation, you can specialize it a bit and compile the GPU code to optimize for your specific material. this can squeeze another factor-of-2 gain: view [buffer\_proto5](crates/applications/buffer_proto5/src/main.rs) to see how that's done.
|
||||||
|
|
||||||
contrast that to the CPU-only implementation which achieves 24.6M grid cell steps per second: that's about a 34x gain.
|
contrast that to the CPU-only implementation which achieves 24.6M grid cell steps per second on my 12-core Ryzen 3900X: that's about a 34x gain.
|
||||||
|
|
||||||
|
|
||||||
# Support
|
# Support
|
||||||
|
|
||||||
the author can be reached on Matrix <@colin:uninsane.org> or Activity Pub <@colin@fed.uninsane.org>. i poured a lot of time into making
|
the author can be reached on Matrix <@colin:uninsane.org>, email <colin@uninsane.org> or Activity Pub <@colin@fed.uninsane.org>. i poured a lot of time into making
|
||||||
this: i'm happy to spend the marginal extra time to help curious people make use of what i've made, so don't hesitate to reach out.
|
this: i'm happy to spend the marginal extra time to help curious people make use of what i've made, so don't hesitate to reach out.
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user