diff --git a/.travis.yml b/.travis.yml
index 1cb2e68c..f3d76887 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,5 @@
 language: rust
 sudo: false
-before_script:
-  - pip install 'travis-cargo<0.2' --user && export PATH=$HOME/.local/bin:$PATH
 
 matrix:
   include:
@@ -11,16 +9,21 @@ matrix:
       os: osx
     - rust: beta
     - rust: nightly
+
+    - rust: nightly
+      before_script:
+        - pip install 'travis-cargo<0.2' --user && export PATH=$HOME/.local/bin:$PATH
       script:
-        - cargo test
+        - cargo doc --no-deps --all-features
+        - cargo test --benches
         - cargo test --features nightly
-        - cargo test --manifest-path rand-derive/Cargo.toml
-        - cargo doc --no-deps --features nightly
+      after_success:
+        - travis-cargo --only nightly doc-upload
+
 script:
   - cargo test
   - cargo test --manifest-path rand-derive/Cargo.toml
-after_success:
-  - travis-cargo --only nightly doc-upload
+
 env:
   global:
     secure: "BdDntVHSompN+Qxz5Rz45VI4ZqhD72r6aPl166FADlnkIwS6N6FLWdqs51O7G5CpoMXEDvyYrjmRMZe/GYLIG9cmqmn/wUrWPO+PauGiIuG/D2dmfuUNvSTRcIe7UQLXrfP3yyfZPgqsH6pSnNEVopquQKy3KjzqepgriOJtbyY="
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..4898fc50
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,265 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
+and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
+
+## [0.4.2] - 2018-01-05
+### Changed
+- Use winapi on Windows
+- Update for Fuchsia OS
+- Remove dev-dependency on `log`
+
+## [0.4.1] - 2017-12-17
+### Added
+- `no_std` support
+
+## [0.4.0-pre.0] - 2017-12-11
+### Added
+- `JitterRng` added as a high-quality alternative entropy source using the
+  system timer
+- new `seq` module with `sample_iter`, `sample_slice`, etc.
+- WASM support via dummy implementations (fail at run-time)
+- Additional benchmarks, covering generators and new seq code
+
+### Changed
+- `thread_rng` uses `JitterRng` if seeding from system time fails
+  (slower but more secure than previous method)
+
+### Deprecated
+  - `sample` function deprecated (replaced by `sample_iter`)
+
+## [0.3.18] - 2017-11-06
+### Changed
+- `thread_rng` is seeded from the system time if `OsRng` fails
+- `weak_rng` now uses `thread_rng` internally
+
+
+## [0.3.17] - 2017-10-07
+### Changed
+ - Fuchsia: Magenta was renamed Zircon
+
+## [0.3.16] - 2017-07-27
+### Added
+- Implement Debug for mote non-public types
+- implement `Rand` for (i|u)i128
+- Support for Fuchsia
+
+### Changed
+- Add inline attribute to SampleRange::construct_range.
+  This improves the benchmark for sample in 11% and for shuffle in 16%.
+- Use `RtlGenRandom` instead of `CryptGenRandom`
+
+
+## [0.3.15] - 2016-11-26
+### Added
+- Add `Rng` trait method `choose_mut`
+- Redox support
+
+### Changed
+- Use `arc4rand` for `OsRng` on FreeBSD.
+- Use `arc4random(3)` for `OsRng` on OpenBSD.
+
+### Fixed
+- Fix filling buffers 4 GiB or larger with `OsRng::fill_bytes` on Windows
+
+
+## [0.3.14] - 2016-02-13
+### Fixed
+- Inline definitions from winapi/advapi32, wich decreases build times
+
+
+## [0.3.13] - 2016-01-09
+### Fixed
+- Compatible with Rust 1.7.0-nightly (needed some extra type annotations)
+
+
+## [0.3.12] - 2015-11-09
+### Changed
+- Replaced the methods in `next_f32` and `next_f64` with the technique described
+  Saito & Matsumoto at MCQMC'08. The new method should exhibit a slightly more
+  uniform distribution.
+- Depend on libc 0.2
+
+### Fixed
+- Fix iterator protocol issue in `rand::sample`
+
+
+## [0.3.11] - 2015-08-31
+### Added
+- Implement `Rand` for arrays with n <= 32
+
+
+## [0.3.10] - 2015-08-17
+### Added
+- Support for NaCl platforms
+
+### Changed
+- Allow `Rng` to be `?Sized`, impl for `&mut R` and `Box<R>` where `R: ?Sized + Rng`
+
+
+## [0.3.9] - 2015-06-18
+### Changed
+- Use `winapi` for Windows API things
+
+### Fixed
+- Fixed test on stable/nightly
+- Fix `getrandom` syscall number for aarch64-unknown-linux-gnu
+
+
+## [0.3.8] - 2015-04-23
+### Changed
+- `log` is a dev dependency
+
+### Fixed
+- Fix race condition of atomics in `is_getrandom_available`
+
+
+## [0.3.7] - 2015-04-03
+### Fixed
+- Derive Copy/Clone changes
+
+
+## [0.3.6] - 2015-04-02
+### Changed
+- Move to stable Rust!
+
+
+## [0.3.5] - 2015-04-01
+### Fixed
+- Compatible with Rust master
+
+
+## [0.3.4] - 2015-03-31
+### Added
+- Implement Clone for `Weighted`
+
+### Fixed
+- Compatible with Rust master
+
+
+## [0.3.3] - 2015-03-26
+### Fixed
+- Fix compile on Windows
+
+
+## [0.3.2] - 2015-03-26
+
+
+## [0.3.1] - 2015-03-26
+### Fixed
+- Fix compile on Windows
+
+
+## [0.3.0] - 2015-03-25
+### Changed
+- Update to use log version 0.3.x
+
+
+## [0.2.1] - 2015-03-22
+### Fixed
+- Compatible with Rust master
+- Fixed iOS compilation
+
+
+## [0.2.0] - 2015-03-06
+### Fixed
+- Compatible with Rust master (move from `old_io` to `std::io`)
+
+
+## [0.1.4] - 2015-03-04
+### Fixed
+- Compatible with Rust master (use wrapping ops)
+
+
+## [0.1.3] - 2015-02-20
+### Fixed
+- Compatible with Rust master
+
+### Removed
+- Removed Copy inplementaions from RNGs
+
+
+## [0.1.2] - 2015-02-03
+### Added
+- Imported functionality from `std::rand`, including:
+  - `StdRng`, `SeedableRng`, `TreadRng`, `weak_rng()`
+  - `ReaderRng`: A wrapper around any Reader to treat it as an RNG.
+- Imported documentation from `std::rand`
+- Imported tests from `std::rand`
+
+
+## [0.1.1] - 2015-02-03
+### Added
+- Migrate to a cargo-compatible directory structure.
+
+### Fixed
+- Do not use entropy during `gen_weighted_bool(1)`
+
+
+## [Rust 0.12.0] - 2014-10-09
+### Added
+- Impl Rand for tuples of arity 11 and 12
+- Include ChaCha pseudorandom generator
+- Add `next_f64` and `next_f32` to Rng
+- Implement Clone for PRNGs
+
+### Changed
+- Rename `TaskRng` to `ThreadRng` and `task_rng` to `thread_rng` (since a
+  runtime is removed from Rust).
+
+### Fixed
+- Improved performance of ISAAC and ISAAC64 by 30% and 12 % respectively, by
+  informing the optimiser that indexing is never out-of-bounds.
+
+### Removed
+- Removed the Deprecated `choose_option`
+
+
+## [Rust 0.11.0] - 2014-07-02
+### Added
+- document when to use `OSRng` in cryptographic context, and explain why we use `/dev/urandom` instead of `/dev/random`
+- `Rng::gen_iter()` which will return an infinite stream of random values
+- `Rng::gen_ascii_chars()` which will return an infinite stream of random ascii characters
+
+### Changed
+- Now only depends on libcore!   2adf5363f88ffe06f6d2ea5c338d1b186d47f4a1
+- Remove `Rng.choose()`, rename `Rng.choose_option()` to `.choose()`
+- Rename OSRng to OsRng
+- The WeightedChoice structure is no longer built with a `Vec<Weighted<T>>`,
+  but rather a `&mut [Weighted<T>]`. This means that the WeightedChoice
+  structure now has a lifetime associated with it.
+- The `sample` method on `Rng` has been moved to a top-level function in the
+  `rand` module due to its dependence on `Vec`.
+
+### Removed
+- `Rng::gen_vec()` was removed. Previous behavior can be regained with
+  `rng.gen_iter().take(n).collect()`
+- `Rng::gen_ascii_str()` was removed. Previous behavior can be regained with
+  `rng.gen_ascii_chars().take(n).collect()`
+- {IsaacRng, Isaac64Rng, XorShiftRng}::new() have all been removed. These all
+  relied on being able to use an OSRng for seeding, but this is no longer
+  available in librand (where these types are defined). To retain the same
+  functionality, these types now implement the `Rand` trait so they can be
+  generated with a random seed from another random number generator. This allows
+  the stdlib to use an OSRng to create seeded instances of these RNGs.
+- Rand implementations for `Box<T>` and `@T` were removed. These seemed to be
+  pretty rare in the codebase, and it allows for librand to not depend on
+  liballoc.  Additionally, other pointer types like Rc<T> and Arc<T> were not
+  supported.
+- Remove a slew of old deprecated functions
+
+
+## [Rust 0.10] - 2014-04-03
+### Changed
+- replace `Rng.shuffle's` functionality with `.shuffle_mut`
+- bubble up IO errors when creating an OSRng
+
+### Fixed
+- Use `fill()` instead of `read()`
+- Rewrite OsRng in Rust for windows
+
+## [0.10-pre] - 2014-03-02
+### Added
+- Seperate `rand` out of the standard library
+
diff --git a/Cargo.toml b/Cargo.toml
index 0700436a..9551aadc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rand"
-version = "0.3.17"
+version = "0.4.2"
 authors = ["The Rust Project Developers"]
 license = "MIT/Apache-2.0"
 readme = "README.md"
@@ -14,17 +14,22 @@ keywords = ["random", "rng"]
 categories = ["algorithms"]
 
 [features]
-i128_support = []
-nightly = ["i128_support"]
+default = ["std"]
+nightly = ["i128_support"] # enables all features requiring nightly rust
 
-[dependencies]
-libc = "0.2"
+std = ["libc"] # default feature; without this rand uses libcore
+alloc = []  # enables Vec and Box support without std
 
-[dev-dependencies]
-log = "0.3.0"
+i128_support = [] # enables i128 and u128 support
+
+[target.'cfg(unix)'.dependencies]
+libc = { version = "0.2", optional = true }
+
+[target.'cfg(windows)'.dependencies]
+winapi = { version = "0.3", features = ["minwindef", "ntsecapi", "profileapi", "winnt"] }
 
 [workspace]
 members = ["rand-derive"]
 
 [target.'cfg(target_os = "fuchsia")'.dependencies]
-fuchsia-zircon = "^0.2.1"
+fuchsia-zircon = "0.3.2"
diff --git a/README.md b/README.md
index cd4ee249..2dd47a39 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ Add this to your `Cargo.toml`:
 
 ```toml
 [dependencies]
-rand = "0.3"
+rand = "0.4"
 ```
 
 and this to your crate root:
@@ -23,6 +23,19 @@ and this to your crate root:
 extern crate rand;
 ```
 
+### Versions
+
+The `rand` crate has been at version `0.3` since March 2015. If you wish to
+avoid all breaking changes you may wish to stick with this version.
+
+Version `0.4`was released in December 2017. It contains almost no breaking
+changes since the `0.3` series, but nevertheless contains some significant
+new code, including a new "external" entropy source (`JitterRng`) and `no_std`
+support.
+
+Version `0.5` is in development and contains significant performance
+improvements for the ISAAC random number generators.
+
 ## Examples
 
 There is built-in support for a random number generator (RNG) associated with each thread stored in thread-local storage. This RNG can be accessed via thread_rng, or used implicitly via random. This RNG is normally randomly seeded from an operating-system source of randomness, e.g. /dev/urandom on Unix systems, and will automatically reseed itself from this source after generating 32 KiB of random data.
@@ -50,13 +63,55 @@ let mut rng = rand::ChaChaRng::new_unseeded();
 println!("i32: {}, u32: {}", rng.gen::<i32>(), rng.gen::<u32>())
 ```
 
+## Features
+
+By default, `rand` is built with all stable features available. The following
+optional features are available:
+
+-   `i128_support` enables support for generating `u128` and `i128` values
+-   `nightly` enables all unstable features (`i128_support`)
+-   `std` enabled by default; by setting "default-features = false" `no_std`
+    mode is activated; this removes features depending on `std` functionality:
+
+        -   `OsRng` is entirely unavailable
+        -   `JitterRng` code is still present, but a nanosecond timer must be
+            provided via `JitterRng::new_with_timer`
+        -   Since no external entropy is available, it is not possible to create
+            generators with fresh seeds (user must provide entropy)
+        -   `thread_rng`, `weak_rng` and `random` are all disabled
+        -   exponential, normal and gamma type distributions are unavailable
+            since `exp` and `log` functions are not provided in `core`
+        -   any code requiring `Vec` or `Box`
+-   `alloc` can be used instead of `std` to provide `Vec` and `Box`
+
+## Testing
+
+Unfortunately, `cargo test` does not test everything. The following tests are
+recommended:
+
+```
+# Basic tests for rand and sub-crates
+cargo test --all
+
+# Test no_std support (build only since nearly all tests require std)
+cargo build --all --no-default-features
+
+# Test 128-bit support (requires nightly)
+cargo test --all --features nightly
+
+# Benchmarks (requires nightly)
+cargo bench
+# or just to test the benchmark code:
+cargo test --benches
+```
+
 # `derive(Rand)`
 
 You can derive the `Rand` trait for your custom type via the `#[derive(Rand)]`
 directive. To use this first add this to your Cargo.toml:
 
 ```toml
-rand = "0.3"
+rand = "0.4"
 rand_derive = "0.3"
 ```
 
diff --git a/appveyor.yml b/appveyor.yml
index 39c6a180..02e217fe 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -32,6 +32,7 @@ install:
 build: false
 
 test_script:
+  - cargo test --benches
   - cargo test
   - cargo test --features nightly
   - cargo test --manifest-path rand-derive/Cargo.toml
diff --git a/benches/bench.rs b/benches/bench.rs
index 5fa92bdb..d396f25b 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -9,52 +9,7 @@ mod distributions;
 
 use std::mem::size_of;
 use test::{black_box, Bencher};
-use rand::{XorShiftRng, StdRng, IsaacRng, Isaac64Rng, Rng};
-use rand::{OsRng, sample, weak_rng};
-
-#[bench]
-fn rand_xorshift(b: &mut Bencher) {
-    let mut rng: XorShiftRng = OsRng::new().unwrap().gen();
-    b.iter(|| {
-        for _ in 0..RAND_BENCH_N {
-            black_box(rng.gen::<usize>());
-        }
-    });
-    b.bytes = size_of::<usize>() as u64 * RAND_BENCH_N;
-}
-
-#[bench]
-fn rand_isaac(b: &mut Bencher) {
-    let mut rng: IsaacRng = OsRng::new().unwrap().gen();
-    b.iter(|| {
-        for _ in 0..RAND_BENCH_N {
-            black_box(rng.gen::<usize>());
-        }
-    });
-    b.bytes = size_of::<usize>() as u64 * RAND_BENCH_N;
-}
-
-#[bench]
-fn rand_isaac64(b: &mut Bencher) {
-    let mut rng: Isaac64Rng = OsRng::new().unwrap().gen();
-    b.iter(|| {
-        for _ in 0..RAND_BENCH_N {
-            black_box(rng.gen::<usize>());
-        }
-    });
-    b.bytes = size_of::<usize>() as u64 * RAND_BENCH_N;
-}
-
-#[bench]
-fn rand_std(b: &mut Bencher) {
-    let mut rng = StdRng::new().unwrap();
-    b.iter(|| {
-        for _ in 0..RAND_BENCH_N {
-            black_box(rng.gen::<usize>());
-        }
-    });
-    b.bytes = size_of::<usize>() as u64 * RAND_BENCH_N;
-}
+use rand::{StdRng, Rng};
 
 #[bench]
 fn rand_f32(b: &mut Bencher) {
@@ -77,21 +32,3 @@ fn rand_f64(b: &mut Bencher) {
     });
     b.bytes = size_of::<f64>() as u64 * RAND_BENCH_N;
 }
-
-#[bench]
-fn rand_shuffle_100(b: &mut Bencher) {
-    let mut rng = weak_rng();
-    let x : &mut [usize] = &mut [1; 100];
-    b.iter(|| {
-        rng.shuffle(x);
-    })
-}
-
-#[bench]
-fn rand_sample_10_of_100(b: &mut Bencher) {
-    let mut rng = weak_rng();
-    let x : &[usize] = &[1; 100];
-    b.iter(|| {
-        sample(&mut rng, x, 10);
-    })
-}
diff --git a/benches/generators.rs b/benches/generators.rs
new file mode 100644
index 00000000..daee7c5f
--- /dev/null
+++ b/benches/generators.rs
@@ -0,0 +1,133 @@
+#![feature(test)]
+
+extern crate test;
+extern crate rand;
+
+const RAND_BENCH_N: u64 = 1000;
+const BYTES_LEN: usize = 1024;
+
+use std::mem::size_of;
+use test::{black_box, Bencher};
+
+use rand::{Rng, StdRng, OsRng, JitterRng};
+use rand::{XorShiftRng, IsaacRng, Isaac64Rng, ChaChaRng};
+
+macro_rules! gen_bytes {
+    ($fnn:ident, $gen:ident) => {
+        #[bench]
+        fn $fnn(b: &mut Bencher) {
+            let mut rng: $gen = OsRng::new().unwrap().gen();
+            let mut buf = [0u8; BYTES_LEN];
+            b.iter(|| {
+                for _ in 0..RAND_BENCH_N {
+                    rng.fill_bytes(&mut buf);
+                    black_box(buf);
+                }
+            });
+            b.bytes = BYTES_LEN as u64 * RAND_BENCH_N;
+        }
+    }
+}
+
+macro_rules! gen_bytes_new {
+    ($fnn:ident, $gen:ident) => {
+        #[bench]
+        fn $fnn(b: &mut Bencher) {
+            let mut rng = $gen::new().unwrap();
+            let mut buf = [0u8; BYTES_LEN];
+            b.iter(|| {
+                for _ in 0..RAND_BENCH_N {
+                    rng.fill_bytes(&mut buf);
+                    black_box(buf);
+                }
+            });
+            b.bytes = BYTES_LEN as u64 * RAND_BENCH_N;
+        }
+    }
+}
+
+gen_bytes!(gen_bytes_xorshift, XorShiftRng);
+gen_bytes!(gen_bytes_isaac, IsaacRng);
+gen_bytes!(gen_bytes_isaac64, Isaac64Rng);
+gen_bytes!(gen_bytes_chacha, ChaChaRng);
+gen_bytes_new!(gen_bytes_std, StdRng);
+gen_bytes_new!(gen_bytes_os, OsRng);
+
+
+macro_rules! gen_uint {
+    ($fnn:ident, $ty:ty, $gen:ident) => {
+        #[bench]
+        fn $fnn(b: &mut Bencher) {
+            let mut rng: $gen = OsRng::new().unwrap().gen();
+            b.iter(|| {
+                for _ in 0..RAND_BENCH_N {
+                    black_box(rng.gen::<$ty>());
+                }
+            });
+            b.bytes = size_of::<$ty>() as u64 * RAND_BENCH_N;
+        }
+    }
+}
+
+macro_rules! gen_uint_new {
+    ($fnn:ident, $ty:ty, $gen:ident) => {
+        #[bench]
+        fn $fnn(b: &mut Bencher) {
+            let mut rng = $gen::new().unwrap();
+            b.iter(|| {
+                for _ in 0..RAND_BENCH_N {
+                    black_box(rng.gen::<$ty>());
+                }
+            });
+            b.bytes = size_of::<$ty>() as u64 * RAND_BENCH_N;
+        }
+    }
+}
+
+gen_uint!(gen_u32_xorshift, u32, XorShiftRng);
+gen_uint!(gen_u32_isaac, u32, IsaacRng);
+gen_uint!(gen_u32_isaac64, u32, Isaac64Rng);
+gen_uint!(gen_u32_chacha, u32, ChaChaRng);
+gen_uint_new!(gen_u32_std, u32, StdRng);
+gen_uint_new!(gen_u32_os, u32, OsRng);
+
+gen_uint!(gen_u64_xorshift, u64, XorShiftRng);
+gen_uint!(gen_u64_isaac, u64, IsaacRng);
+gen_uint!(gen_u64_isaac64, u64, Isaac64Rng);
+gen_uint!(gen_u64_chacha, u64, ChaChaRng);
+gen_uint_new!(gen_u64_std, u64, StdRng);
+gen_uint_new!(gen_u64_os, u64, OsRng);
+
+#[bench]
+fn gen_u64_jitter(b: &mut Bencher) {
+    let mut rng = JitterRng::new().unwrap();
+    b.iter(|| {
+        black_box(rng.gen::<u64>());
+    });
+    b.bytes = size_of::<u64>() as u64;
+}
+
+macro_rules! init_gen {
+    ($fnn:ident, $gen:ident) => {
+        #[bench]
+        fn $fnn(b: &mut Bencher) {
+            let mut rng: XorShiftRng = OsRng::new().unwrap().gen();
+            b.iter(|| {
+                let r2: $gen = rng.gen();
+                black_box(r2);
+            });
+        }
+    }
+}
+
+init_gen!(init_xorshift, XorShiftRng);
+init_gen!(init_isaac, IsaacRng);
+init_gen!(init_isaac64, Isaac64Rng);
+init_gen!(init_chacha, ChaChaRng);
+
+#[bench]
+fn init_jitter(b: &mut Bencher) {
+    b.iter(|| {
+        black_box(JitterRng::new().unwrap());
+    });
+}
diff --git a/benches/misc.rs b/benches/misc.rs
new file mode 100644
index 00000000..42517617
--- /dev/null
+++ b/benches/misc.rs
@@ -0,0 +1,62 @@
+#![feature(test)]
+
+extern crate test;
+extern crate rand;
+
+use test::{black_box, Bencher};
+
+use rand::{Rng, weak_rng};
+use rand::seq::*;
+
+#[bench]
+fn misc_shuffle_100(b: &mut Bencher) {
+    let mut rng = weak_rng();
+    let x : &mut [usize] = &mut [1; 100];
+    b.iter(|| {
+        rng.shuffle(x);
+        black_box(&x);
+    })
+}
+
+#[bench]
+fn misc_sample_iter_10_of_100(b: &mut Bencher) {
+    let mut rng = weak_rng();
+    let x : &[usize] = &[1; 100];
+    b.iter(|| {
+        black_box(sample_iter(&mut rng, x, 10).unwrap_or_else(|e| e));
+    })
+}
+
+#[bench]
+fn misc_sample_slice_10_of_100(b: &mut Bencher) {
+    let mut rng = weak_rng();
+    let x : &[usize] = &[1; 100];
+    b.iter(|| {
+        black_box(sample_slice(&mut rng, x, 10));
+    })
+}
+
+#[bench]
+fn misc_sample_slice_ref_10_of_100(b: &mut Bencher) {
+    let mut rng = weak_rng();
+    let x : &[usize] = &[1; 100];
+    b.iter(|| {
+        black_box(sample_slice_ref(&mut rng, x, 10));
+    })
+}
+
+macro_rules! sample_indices {
+    ($name:ident, $amount:expr, $length:expr) => {
+        #[bench]
+        fn $name(b: &mut Bencher) {
+            let mut rng = weak_rng();
+            b.iter(|| {
+                black_box(sample_indices(&mut rng, $length, $amount));
+            })
+        }
+    }
+}
+
+sample_indices!(misc_sample_indices_10_of_1k, 10, 1000);
+sample_indices!(misc_sample_indices_50_of_1k, 50, 1000);
+sample_indices!(misc_sample_indices_100_of_1k, 100, 1000);
diff --git a/rand-derive/Cargo.toml b/rand-derive/Cargo.toml
index c3edaffc..1a2dbe10 100644
--- a/rand-derive/Cargo.toml
+++ b/rand-derive/Cargo.toml
@@ -20,4 +20,4 @@ quote = "0.3"
 syn = "0.11"
 
 [dev-dependencies]
-rand = { path = "..", version = "0.3" }
+rand = { path = "..", version = "0.4" }
diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs
index 07f7a56a..8ff13c69 100644
--- a/src/distributions/mod.rs
+++ b/src/distributions/mod.rs
@@ -17,20 +17,29 @@
 //! internally. The `IndependentSample` trait is for generating values
 //! that do not need to record state.
 
-use std::marker;
+use core::marker;
 
 use {Rng, Rand};
 
 pub use self::range::Range;
+#[cfg(feature="std")]
 pub use self::gamma::{Gamma, ChiSquared, FisherF, StudentT};
+#[cfg(feature="std")]
 pub use self::normal::{Normal, LogNormal};
+#[cfg(feature="std")]
 pub use self::exponential::Exp;
 
 pub mod range;
+#[cfg(feature="std")]
 pub mod gamma;
+#[cfg(feature="std")]
 pub mod normal;
+#[cfg(feature="std")]
 pub mod exponential;
 
+#[cfg(feature="std")]
+mod ziggurat_tables;
+
 /// Types that can be used to create a random instance of `Support`.
 pub trait Sample<Support> {
     /// Generate a random value of `Support`, using `rng` as the
@@ -124,7 +133,7 @@ impl<'a, T: Clone> WeightedChoice<'a, T> {
     ///
     /// Panics if:
     ///
-    /// - `v` is empty
+    /// - `items` is empty
     /// - the total weight is 0
     /// - the total weight is larger than a `u32` can contain.
     pub fn new(items: &'a mut [Weighted<T>]) -> WeightedChoice<'a, T> {
@@ -203,8 +212,6 @@ impl<'a, T: Clone> IndependentSample<T> for WeightedChoice<'a, T> {
     }
 }
 
-mod ziggurat_tables;
-
 /// Sample a random number using the Ziggurat method (specifically the
 /// ZIGNOR variant from Doornik 2005). Most of the arguments are
 /// directly from the paper:
@@ -220,6 +227,7 @@ mod ziggurat_tables;
 
 // the perf improvement (25-50%) is definitely worth the extra code
 // size from force-inlining.
+#[cfg(feature="std")]
 #[inline(always)]
 fn ziggurat<R: Rng, P, Z>(
             rng: &mut R,
@@ -271,8 +279,8 @@ fn ziggurat<R: Rng, P, Z>(
 
 #[cfg(test)]
 mod tests {
-
     use {Rng, Rand};
+    use impls;
     use super::{RandSample, WeightedChoice, Weighted, Sample, IndependentSample};
 
     #[derive(PartialEq, Debug)]
@@ -293,6 +301,10 @@ mod tests {
         fn next_u64(&mut self) -> u64 {
             self.next_u32() as u64
         }
+
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u32(self, dest)
+        }
     }
 
     #[test]
diff --git a/src/distributions/range.rs b/src/distributions/range.rs
index 0d812d72..ccdcdf1a 100644
--- a/src/distributions/range.rs
+++ b/src/distributions/range.rs
@@ -12,7 +12,7 @@
 
 // this is surprisingly complicated to be both generic & correct
 
-use std::num::Wrapping as w;
+use core::num::Wrapping as w;
 
 use Rng;
 use distributions::{Sample, IndependentSample};
@@ -99,7 +99,7 @@ macro_rules! integer_impl {
             #[inline]
             fn construct_range(low: $ty, high: $ty) -> Range<$ty> {
                 let range = (w(high as $unsigned) - w(low as $unsigned)).0;
-                let unsigned_max: $unsigned = ::std::$unsigned::MAX;
+                let unsigned_max: $unsigned = ::core::$unsigned::MAX;
 
                 // this is the largest number that fits into $unsigned
                 // that `range` divides evenly, so, if we've sampled
@@ -136,11 +136,15 @@ integer_impl! { i8, u8 }
 integer_impl! { i16, u16 }
 integer_impl! { i32, u32 }
 integer_impl! { i64, u64 }
+#[cfg(feature = "i128_support")]
+integer_impl! { i128, u128 }
 integer_impl! { isize, usize }
 integer_impl! { u8, u8 }
 integer_impl! { u16, u16 }
 integer_impl! { u32, u32 }
 integer_impl! { u64, u64 }
+#[cfg(feature = "i128_support")]
+integer_impl! { u128, u128 }
 integer_impl! { usize, usize }
 
 macro_rules! float_impl {
@@ -187,7 +191,7 @@ mod tests {
                 $(
                    let v: &[($ty, $ty)] = &[(0, 10),
                                             (10, 127),
-                                            (::std::$ty::MIN, ::std::$ty::MAX)];
+                                            (::core::$ty::MIN, ::core::$ty::MAX)];
                    for &(low, high) in v.iter() {
                         let mut sampler: Range<$ty> = Range::new(low, high);
                         for _ in 0..1000 {
@@ -200,8 +204,12 @@ mod tests {
                  )*
             }}
         }
+        #[cfg(not(feature = "i128_support"))]
         t!(i8, i16, i32, i64, isize,
-           u8, u16, u32, u64, usize)
+           u8, u16, u32, u64, usize);
+        #[cfg(feature = "i128_support")]
+        t!(i8, i16, i32, i64, i128, isize,
+           u8, u16, u32, u64, u128, usize);
     }
 
     #[test]
diff --git a/src/impls.rs b/src/impls.rs
new file mode 100644
index 00000000..70c6006f
--- /dev/null
+++ b/src/impls.rs
@@ -0,0 +1,170 @@
+// Copyright 2013-2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Helper functions for implementing `Rng` functions.
+//!
+//! For cross-platform reproducibility, these functions all use Little Endian:
+//! least-significant part first. For example, `next_u64_via_u32` takes `u32`
+//! values `x, y`, then outputs `(y << 32) | x`. To implement `next_u32`
+//! from `next_u64` in little-endian order, one should use `next_u64() as u32`.
+//!
+//! Byte-swapping (like the std `to_le` functions) is only needed to convert
+//! to/from byte sequences, and since its purpose is reproducibility,
+//! non-reproducible sources (e.g. `OsRng`) need not bother with it.
+
+// TODO: eventually these should be exported somehow
+#![allow(unused)]
+
+use core::intrinsics::transmute;
+use core::slice;
+use core::cmp::min;
+use core::mem::size_of;
+use Rng;
+
+/// Implement `next_u64` via `next_u32`, little-endian order.
+pub fn next_u64_via_u32<R: Rng+?Sized>(rng: &mut R) -> u64 {
+    // Use LE; we explicitly generate one value before the next.
+    let x = rng.next_u32() as u64;
+    let y = rng.next_u32() as u64;
+    (y << 32) | x
+}
+
+macro_rules! fill_bytes_via {
+    ($rng:ident, $next_u:ident, $BYTES:expr, $dest:ident) => {{
+        let mut left = $dest;
+        while left.len() >= $BYTES {
+            let (l, r) = {left}.split_at_mut($BYTES);
+            left = r;
+            let chunk: [u8; $BYTES] = unsafe {
+                transmute($rng.$next_u().to_le())
+            };
+            l.copy_from_slice(&chunk);
+        }
+        let n = left.len();
+        if n > 0 {
+            let chunk: [u8; $BYTES] = unsafe {
+                transmute($rng.$next_u().to_le())
+            };
+            left.copy_from_slice(&chunk[..n]);
+        }
+    }}
+}
+
+/// Implement `fill_bytes` via `next_u32`, little-endian order.
+pub fn fill_bytes_via_u32<R: Rng+?Sized>(rng: &mut R, dest: &mut [u8]) {
+    fill_bytes_via!(rng, next_u32, 4, dest)
+}
+
+/// Implement `fill_bytes` via `next_u64`, little-endian order.
+pub fn fill_bytes_via_u64<R: Rng+?Sized>(rng: &mut R, dest: &mut [u8]) {
+    fill_bytes_via!(rng, next_u64, 8, dest)
+}
+
+macro_rules! impl_uint_from_fill {
+    ($rng:expr, $ty:ty, $N:expr) => ({
+        debug_assert!($N == size_of::<$ty>());
+
+        let mut int: $ty = 0;
+        unsafe {
+            let ptr = &mut int as *mut $ty as *mut u8;
+            let slice = slice::from_raw_parts_mut(ptr, $N);
+            $rng.fill_bytes(slice);
+        }
+        int
+    });
+}
+
+macro_rules! fill_via_chunks {
+    ($src:expr, $dest:expr, $N:expr) => ({
+        let chunk_size_u8 = min($src.len() * $N, $dest.len());
+        let chunk_size = (chunk_size_u8 + $N - 1) / $N;
+
+        // Convert to little-endian:
+        for ref mut x in $src[0..chunk_size].iter_mut() {
+            **x = (*x).to_le();
+        }
+
+        let bytes = unsafe { slice::from_raw_parts($src.as_ptr() as *const u8,
+                                                   $src.len() * $N) };
+
+        let dest_chunk = &mut $dest[0..chunk_size_u8];
+        dest_chunk.copy_from_slice(&bytes[0..chunk_size_u8]);
+
+        (chunk_size, chunk_size_u8)
+    });
+}
+
+/// Implement `fill_bytes` by reading chunks from the output buffer of a block
+/// based RNG.
+///
+/// The return values are `(consumed_u32, filled_u8)`.
+///
+/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
+/// the length of `dest`.
+/// `consumed_u32` is the number of words consumed from `src`, which is the same
+/// as `filled_u8 / 4` rounded up.
+///
+/// Note that on big-endian systems values in the output buffer `src` are
+/// mutated. `src[0..consumed_u32]` get converted to little-endian before
+/// copying.
+///
+/// # Example
+/// (from `IsaacRng`)
+///
+/// ```rust,ignore
+/// fn fill_bytes(&mut self, dest: &mut [u8]) {
+///     let mut read_len = 0;
+///     while read_len < dest.len() {
+///         if self.index >= self.rsl.len() {
+///             self.isaac();
+///         }
+///
+///         let (consumed_u32, filled_u8) =
+///             impls::fill_via_u32_chunks(&mut self.rsl[self.index..],
+///                                        &mut dest[read_len..]);
+///
+///         self.index += consumed_u32;
+///         read_len += filled_u8;
+///     }
+/// }
+/// ```
+pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) {
+    fill_via_chunks!(src, dest, 4)
+}
+
+/// Implement `fill_bytes` by reading chunks from the output buffer of a block
+/// based RNG.
+///
+/// The return values are `(consumed_u64, filled_u8)`.
+/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
+/// the length of `dest`.
+/// `consumed_u64` is the number of words consumed from `src`, which is the same
+/// as `filled_u8 / 8` rounded up.
+///
+/// Note that on big-endian systems values in the output buffer `src` are
+/// mutated. `src[0..consumed_u64]` get converted to little-endian before
+/// copying.
+///
+/// See `fill_via_u32_chunks` for an example.
+pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) {
+    fill_via_chunks!(src, dest, 8)
+}
+
+/// Implement `next_u32` via `fill_bytes`, little-endian order.
+pub fn next_u32_via_fill<R: Rng+?Sized>(rng: &mut R) -> u32 {
+    impl_uint_from_fill!(rng, u32, 4)
+}
+
+/// Implement `next_u64` via `fill_bytes`, little-endian order.
+pub fn next_u64_via_fill<R: Rng+?Sized>(rng: &mut R) -> u64 {
+    impl_uint_from_fill!(rng, u64, 8)
+}
+
+// TODO: implement tests for the above
diff --git a/src/isaac.rs b/src/isaac.rs
deleted file mode 100644
index 429f8a0b..00000000
--- a/src/isaac.rs
+++ /dev/null
@@ -1,635 +0,0 @@
-// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// https://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-//! The ISAAC random number generator.
-
-#![allow(non_camel_case_types)]
-
-use std::slice;
-use std::iter::repeat;
-use std::num::Wrapping as w;
-use std::fmt;
-
-use {Rng, SeedableRng, Rand, w32, w64};
-
-const RAND_SIZE_LEN: usize = 8;
-const RAND_SIZE: u32 = 1 << RAND_SIZE_LEN;
-const RAND_SIZE_USIZE: usize = 1 << RAND_SIZE_LEN;
-
-/// A random number generator that uses the ISAAC algorithm[1].
-///
-/// The ISAAC algorithm is generally accepted as suitable for
-/// cryptographic purposes, but this implementation has not be
-/// verified as such. Prefer a generator like `OsRng` that defers to
-/// the operating system for cases that need high security.
-///
-/// [1]: Bob Jenkins, [*ISAAC: A fast cryptographic random number
-/// generator*](http://www.burtleburtle.net/bob/rand/isaacafa.html)
-#[derive(Copy)]
-pub struct IsaacRng {
-    cnt: u32,
-    rsl: [w32; RAND_SIZE_USIZE],
-    mem: [w32; RAND_SIZE_USIZE],
-    a: w32,
-    b: w32,
-    c: w32,
-}
-
-static EMPTY: IsaacRng = IsaacRng {
-    cnt: 0,
-    rsl: [w(0); RAND_SIZE_USIZE],
-    mem: [w(0); RAND_SIZE_USIZE],
-    a: w(0), b: w(0), c: w(0),
-};
-
-impl IsaacRng {
-
-    /// Create an ISAAC random number generator using the default
-    /// fixed seed.
-    pub fn new_unseeded() -> IsaacRng {
-        let mut rng = EMPTY;
-        rng.init(false);
-        rng
-    }
-
-    /// Initialises `self`. If `use_rsl` is true, then use the current value
-    /// of `rsl` as a seed, otherwise construct one algorithmically (not
-    /// randomly).
-    fn init(&mut self, use_rsl: bool) {
-        let mut a = w(0x9e3779b9);
-        let mut b = a;
-        let mut c = a;
-        let mut d = a;
-        let mut e = a;
-        let mut f = a;
-        let mut g = a;
-        let mut h = a;
-
-        macro_rules! mix {
-            () => {{
-                a=a^(b<<11); d=d+a; b=b+c;
-                b=b^(c>>2);  e=e+b; c=c+d;
-                c=c^(d<<8);  f=f+c; d=d+e;
-                d=d^(e>>16); g=g+d; e=e+f;
-                e=e^(f<<10); h=h+e; f=f+g;
-                f=f^(g>>4);  a=a+f; g=g+h;
-                g=g^(h<<8);  b=b+g; h=h+a;
-                h=h^(a>>9);  c=c+h; a=a+b;
-            }}
-        }
-
-        for _ in 0..4 {
-            mix!();
-        }
-
-        if use_rsl {
-            macro_rules! memloop {
-                ($arr:expr) => {{
-                    for i in (0..RAND_SIZE_USIZE/8).map(|i| i * 8) {
-                        a=a+$arr[i  ]; b=b+$arr[i+1];
-                        c=c+$arr[i+2]; d=d+$arr[i+3];
-                        e=e+$arr[i+4]; f=f+$arr[i+5];
-                        g=g+$arr[i+6]; h=h+$arr[i+7];
-                        mix!();
-                        self.mem[i  ]=a; self.mem[i+1]=b;
-                        self.mem[i+2]=c; self.mem[i+3]=d;
-                        self.mem[i+4]=e; self.mem[i+5]=f;
-                        self.mem[i+6]=g; self.mem[i+7]=h;
-                    }
-                }}
-            }
-
-            memloop!(self.rsl);
-            memloop!(self.mem);
-        } else {
-            for i in (0..RAND_SIZE_USIZE/8).map(|i| i * 8) {
-                mix!();
-                self.mem[i  ]=a; self.mem[i+1]=b;
-                self.mem[i+2]=c; self.mem[i+3]=d;
-                self.mem[i+4]=e; self.mem[i+5]=f;
-                self.mem[i+6]=g; self.mem[i+7]=h;
-            }
-        }
-
-        self.isaac();
-    }
-
-    /// Refills the output buffer (`self.rsl`)
-    #[inline]
-    fn isaac(&mut self) {
-        self.c = self.c + w(1);
-        // abbreviations
-        let mut a = self.a;
-        let mut b = self.b + self.c;
-
-        const MIDPOINT: usize = RAND_SIZE_USIZE / 2;
-
-        macro_rules! ind {
-            ($x:expr) => ( self.mem[($x >> 2usize).0 as usize & (RAND_SIZE_USIZE - 1)] )
-        }
-
-        let r = [(0, MIDPOINT), (MIDPOINT, 0)];
-        for &(mr_offset, m2_offset) in r.iter() {
-
-            macro_rules! rngstepp {
-                ($j:expr, $shift:expr) => {{
-                    let base = $j;
-                    let mix = a << $shift;
-
-                    let x = self.mem[base  + mr_offset];
-                    a = (a ^ mix) + self.mem[base + m2_offset];
-                    let y = ind!(x) + a + b;
-                    self.mem[base + mr_offset] = y;
-
-                    b = ind!(y >> RAND_SIZE_LEN) + x;
-                    self.rsl[base + mr_offset] = b;
-                }}
-            }
-
-            macro_rules! rngstepn {
-                ($j:expr, $shift:expr) => {{
-                    let base = $j;
-                    let mix = a >> $shift;
-
-                    let x = self.mem[base  + mr_offset];
-                    a = (a ^ mix) + self.mem[base + m2_offset];
-                    let y = ind!(x) + a + b;
-                    self.mem[base + mr_offset] = y;
-
-                    b = ind!(y >> RAND_SIZE_LEN) + x;
-                    self.rsl[base + mr_offset] = b;
-                }}
-            }
-
-            for i in (0..MIDPOINT/4).map(|i| i * 4) {
-                rngstepp!(i + 0, 13);
-                rngstepn!(i + 1, 6);
-                rngstepp!(i + 2, 2);
-                rngstepn!(i + 3, 16);
-            }
-        }
-
-        self.a = a;
-        self.b = b;
-        self.cnt = RAND_SIZE;
-    }
-}
-
-// Cannot be derived because [u32; 256] does not implement Clone
-impl Clone for IsaacRng {
-    fn clone(&self) -> IsaacRng {
-        *self
-    }
-}
-
-impl Rng for IsaacRng {
-    #[inline]
-    fn next_u32(&mut self) -> u32 {
-        if self.cnt == 0 {
-            // make some more numbers
-            self.isaac();
-        }
-        self.cnt -= 1;
-
-        // self.cnt is at most RAND_SIZE, but that is before the
-        // subtraction above. We want to index without bounds
-        // checking, but this could lead to incorrect code if someone
-        // misrefactors, so we check, sometimes.
-        //
-        // (Changes here should be reflected in Isaac64Rng.next_u64.)
-        debug_assert!(self.cnt < RAND_SIZE);
-
-        // (the % is cheaply telling the optimiser that we're always
-        // in bounds, without unsafe. NB. this is a power of two, so
-        // it optimises to a bitwise mask).
-        self.rsl[(self.cnt % RAND_SIZE) as usize].0
-    }
-}
-
-impl<'a> SeedableRng<&'a [u32]> for IsaacRng {
-    fn reseed(&mut self, seed: &'a [u32]) {
-        // make the seed into [seed[0], seed[1], ..., seed[seed.len()
-        // - 1], 0, 0, ...], to fill rng.rsl.
-        let seed_iter = seed.iter().map(|&x| x).chain(repeat(0u32));
-
-        for (rsl_elem, seed_elem) in self.rsl.iter_mut().zip(seed_iter) {
-            *rsl_elem = w(seed_elem);
-        }
-        self.cnt = 0;
-        self.a = w(0);
-        self.b = w(0);
-        self.c = w(0);
-
-        self.init(true);
-    }
-
-    /// Create an ISAAC random number generator with a seed. This can
-    /// be any length, although the maximum number of elements used is
-    /// 256 and any more will be silently ignored. A generator
-    /// constructed with a given seed will generate the same sequence
-    /// of values as all other generators constructed with that seed.
-    fn from_seed(seed: &'a [u32]) -> IsaacRng {
-        let mut rng = EMPTY;
-        rng.reseed(seed);
-        rng
-    }
-}
-
-impl Rand for IsaacRng {
-    fn rand<R: Rng>(other: &mut R) -> IsaacRng {
-        let mut ret = EMPTY;
-        unsafe {
-            let ptr = ret.rsl.as_mut_ptr() as *mut u8;
-
-            let slice = slice::from_raw_parts_mut(ptr, RAND_SIZE_USIZE * 4);
-            other.fill_bytes(slice);
-        }
-        ret.cnt = 0;
-        ret.a = w(0);
-        ret.b = w(0);
-        ret.c = w(0);
-
-        ret.init(true);
-        return ret;
-    }
-}
-
-impl fmt::Debug for IsaacRng {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "IsaacRng {{}}")
-    }
-}
-
-const RAND_SIZE_64_LEN: usize = 8;
-const RAND_SIZE_64: usize = 1 << RAND_SIZE_64_LEN;
-
-/// A random number generator that uses ISAAC-64[1], the 64-bit
-/// variant of the ISAAC algorithm.
-///
-/// The ISAAC algorithm is generally accepted as suitable for
-/// cryptographic purposes, but this implementation has not be
-/// verified as such. Prefer a generator like `OsRng` that defers to
-/// the operating system for cases that need high security.
-///
-/// [1]: Bob Jenkins, [*ISAAC: A fast cryptographic random number
-/// generator*](http://www.burtleburtle.net/bob/rand/isaacafa.html)
-#[derive(Copy)]
-pub struct Isaac64Rng {
-    cnt: usize,
-    rsl: [w64; RAND_SIZE_64],
-    mem: [w64; RAND_SIZE_64],
-    a: w64,
-    b: w64,
-    c: w64,
-}
-
-static EMPTY_64: Isaac64Rng = Isaac64Rng {
-    cnt: 0,
-    rsl: [w(0); RAND_SIZE_64],
-    mem: [w(0); RAND_SIZE_64],
-    a: w(0), b: w(0), c: w(0),
-};
-
-impl Isaac64Rng {
-    /// Create a 64-bit ISAAC random number generator using the
-    /// default fixed seed.
-    pub fn new_unseeded() -> Isaac64Rng {
-        let mut rng = EMPTY_64;
-        rng.init(false);
-        rng
-    }
-
-    /// Initialises `self`. If `use_rsl` is true, then use the current value
-    /// of `rsl` as a seed, otherwise construct one algorithmically (not
-    /// randomly).
-    fn init(&mut self, use_rsl: bool) {
-        macro_rules! init {
-            ($var:ident) => (
-                let mut $var = w(0x9e3779b97f4a7c13);
-            )
-        }
-        init!(a); init!(b); init!(c); init!(d);
-        init!(e); init!(f); init!(g); init!(h);
-
-        macro_rules! mix {
-            () => {{
-                a=a-e; f=f^(h>>9);  h=h+a;
-                b=b-f; g=g^(a<<9);  a=a+b;
-                c=c-g; h=h^(b>>23); b=b+c;
-                d=d-h; a=a^(c<<15); c=c+d;
-                e=e-a; b=b^(d>>14); d=d+e;
-                f=f-b; c=c^(e<<20); e=e+f;
-                g=g-c; d=d^(f>>17); f=f+g;
-                h=h-d; e=e^(g<<14); g=g+h;
-            }}
-        }
-
-        for _ in 0..4 {
-            mix!();
-        }
-
-        if use_rsl {
-            macro_rules! memloop {
-                ($arr:expr) => {{
-                    for i in (0..RAND_SIZE_64 / 8).map(|i| i * 8) {
-                        a=a+$arr[i  ]; b=b+$arr[i+1];
-                        c=c+$arr[i+2]; d=d+$arr[i+3];
-                        e=e+$arr[i+4]; f=f+$arr[i+5];
-                        g=g+$arr[i+6]; h=h+$arr[i+7];
-                        mix!();
-                        self.mem[i  ]=a; self.mem[i+1]=b;
-                        self.mem[i+2]=c; self.mem[i+3]=d;
-                        self.mem[i+4]=e; self.mem[i+5]=f;
-                        self.mem[i+6]=g; self.mem[i+7]=h;
-                    }
-                }}
-            }
-
-            memloop!(self.rsl);
-            memloop!(self.mem);
-        } else {
-            for i in (0..RAND_SIZE_64 / 8).map(|i| i * 8) {
-                mix!();
-                self.mem[i  ]=a; self.mem[i+1]=b;
-                self.mem[i+2]=c; self.mem[i+3]=d;
-                self.mem[i+4]=e; self.mem[i+5]=f;
-                self.mem[i+6]=g; self.mem[i+7]=h;
-            }
-        }
-
-        self.isaac64();
-    }
-
-    /// Refills the output buffer (`self.rsl`)
-    fn isaac64(&mut self) {
-        self.c = self.c + w(1);
-        // abbreviations
-        let mut a = self.a;
-        let mut b = self.b + self.c;
-        const MIDPOINT: usize =  RAND_SIZE_64 / 2;
-        const MP_VEC: [(usize, usize); 2] = [(0,MIDPOINT), (MIDPOINT, 0)];
-        macro_rules! ind {
-            ($x:expr) => {
-                *self.mem.get_unchecked((($x >> 3usize).0 as usize) & (RAND_SIZE_64 - 1))
-            }
-        }
-
-        for &(mr_offset, m2_offset) in MP_VEC.iter() {
-            for base in (0..MIDPOINT / 4).map(|i| i * 4) {
-
-                macro_rules! rngstepp {
-                    ($j:expr, $shift:expr) => {{
-                        let base = base + $j;
-                        let mix = a ^ (a << $shift);
-                        let mix = if $j == 0 {!mix} else {mix};
-
-                        unsafe {
-                            let x = *self.mem.get_unchecked(base + mr_offset);
-                            a = mix + *self.mem.get_unchecked(base + m2_offset);
-                            let y = ind!(x) + a + b;
-                            *self.mem.get_unchecked_mut(base + mr_offset) = y;
-
-                            b = ind!(y >> RAND_SIZE_64_LEN) + x;
-                            *self.rsl.get_unchecked_mut(base + mr_offset) = b;
-                        }
-                    }}
-                }
-
-                macro_rules! rngstepn {
-                    ($j:expr, $shift:expr) => {{
-                        let base = base + $j;
-                        let mix = a ^ (a >> $shift);
-                        let mix = if $j == 0 {!mix} else {mix};
-
-                        unsafe {
-                            let x = *self.mem.get_unchecked(base + mr_offset);
-                            a = mix + *self.mem.get_unchecked(base + m2_offset);
-                            let y = ind!(x) + a + b;
-                            *self.mem.get_unchecked_mut(base + mr_offset) = y;
-
-                            b = ind!(y >> RAND_SIZE_64_LEN) + x;
-                            *self.rsl.get_unchecked_mut(base + mr_offset) = b;
-                        }
-                    }}
-                }
-
-                rngstepp!(0, 21);
-                rngstepn!(1, 5);
-                rngstepp!(2, 12);
-                rngstepn!(3, 33);
-            }
-        }
-
-        self.a = a;
-        self.b = b;
-        self.cnt = RAND_SIZE_64;
-    }
-}
-
-// Cannot be derived because [u32; 256] does not implement Clone
-impl Clone for Isaac64Rng {
-    fn clone(&self) -> Isaac64Rng {
-        *self
-    }
-}
-
-impl Rng for Isaac64Rng {
-    // FIXME #7771: having next_u32 like this should be unnecessary
-    #[inline]
-    fn next_u32(&mut self) -> u32 {
-        self.next_u64() as u32
-    }
-
-    #[inline]
-    fn next_u64(&mut self) -> u64 {
-        if self.cnt == 0 {
-            // make some more numbers
-            self.isaac64();
-        }
-        self.cnt -= 1;
-
-        // See corresponding location in IsaacRng.next_u32 for
-        // explanation.
-        debug_assert!(self.cnt < RAND_SIZE_64);
-        self.rsl[(self.cnt % RAND_SIZE_64) as usize].0
-    }
-}
-
-impl<'a> SeedableRng<&'a [u64]> for Isaac64Rng {
-    fn reseed(&mut self, seed: &'a [u64]) {
-        // make the seed into [seed[0], seed[1], ..., seed[seed.len()
-        // - 1], 0, 0, ...], to fill rng.rsl.
-        let seed_iter = seed.iter().map(|&x| x).chain(repeat(0u64));
-
-        for (rsl_elem, seed_elem) in self.rsl.iter_mut().zip(seed_iter) {
-            *rsl_elem = w(seed_elem);
-        }
-        self.cnt = 0;
-        self.a = w(0);
-        self.b = w(0);
-        self.c = w(0);
-
-        self.init(true);
-    }
-
-    /// Create an ISAAC random number generator with a seed. This can
-    /// be any length, although the maximum number of elements used is
-    /// 256 and any more will be silently ignored. A generator
-    /// constructed with a given seed will generate the same sequence
-    /// of values as all other generators constructed with that seed.
-    fn from_seed(seed: &'a [u64]) -> Isaac64Rng {
-        let mut rng = EMPTY_64;
-        rng.reseed(seed);
-        rng
-    }
-}
-
-impl Rand for Isaac64Rng {
-    fn rand<R: Rng>(other: &mut R) -> Isaac64Rng {
-        let mut ret = EMPTY_64;
-        unsafe {
-            let ptr = ret.rsl.as_mut_ptr() as *mut u8;
-
-            let slice = slice::from_raw_parts_mut(ptr, RAND_SIZE_64 * 8);
-            other.fill_bytes(slice);
-        }
-        ret.cnt = 0;
-        ret.a = w(0);
-        ret.b = w(0);
-        ret.c = w(0);
-
-        ret.init(true);
-        return ret;
-    }
-}
-
-impl fmt::Debug for Isaac64Rng {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "Isaac64Rng {{}}")
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use {Rng, SeedableRng};
-    use super::{IsaacRng, Isaac64Rng};
-
-    #[test]
-    fn test_rng_32_rand_seeded() {
-        let s = ::test::rng().gen_iter::<u32>().take(256).collect::<Vec<u32>>();
-        let mut ra: IsaacRng = SeedableRng::from_seed(&s[..]);
-        let mut rb: IsaacRng = SeedableRng::from_seed(&s[..]);
-        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
-                                rb.gen_ascii_chars().take(100)));
-    }
-    #[test]
-    fn test_rng_64_rand_seeded() {
-        let s = ::test::rng().gen_iter::<u64>().take(256).collect::<Vec<u64>>();
-        let mut ra: Isaac64Rng = SeedableRng::from_seed(&s[..]);
-        let mut rb: Isaac64Rng = SeedableRng::from_seed(&s[..]);
-        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
-                                rb.gen_ascii_chars().take(100)));
-    }
-
-    #[test]
-    fn test_rng_32_seeded() {
-        let seed: &[_] = &[1, 23, 456, 7890, 12345];
-        let mut ra: IsaacRng = SeedableRng::from_seed(seed);
-        let mut rb: IsaacRng = SeedableRng::from_seed(seed);
-        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
-                                rb.gen_ascii_chars().take(100)));
-    }
-    #[test]
-    fn test_rng_64_seeded() {
-        let seed: &[_] = &[1, 23, 456, 7890, 12345];
-        let mut ra: Isaac64Rng = SeedableRng::from_seed(seed);
-        let mut rb: Isaac64Rng = SeedableRng::from_seed(seed);
-        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
-                                rb.gen_ascii_chars().take(100)));
-    }
-
-    #[test]
-    fn test_rng_32_reseed() {
-        let s = ::test::rng().gen_iter::<u32>().take(256).collect::<Vec<u32>>();
-        let mut r: IsaacRng = SeedableRng::from_seed(&s[..]);
-        let string1: String = r.gen_ascii_chars().take(100).collect();
-
-        r.reseed(&s[..]);
-
-        let string2: String = r.gen_ascii_chars().take(100).collect();
-        assert_eq!(string1, string2);
-    }
-    #[test]
-    fn test_rng_64_reseed() {
-        let s = ::test::rng().gen_iter::<u64>().take(256).collect::<Vec<u64>>();
-        let mut r: Isaac64Rng = SeedableRng::from_seed(&s[..]);
-        let string1: String = r.gen_ascii_chars().take(100).collect();
-
-        r.reseed(&s[..]);
-
-        let string2: String = r.gen_ascii_chars().take(100).collect();
-        assert_eq!(string1, string2);
-    }
-
-    #[test]
-    fn test_rng_32_true_values() {
-        let seed: &[_] = &[1, 23, 456, 7890, 12345];
-        let mut ra: IsaacRng = SeedableRng::from_seed(seed);
-        // Regression test that isaac is actually using the above vector
-        let v = (0..10).map(|_| ra.next_u32()).collect::<Vec<_>>();
-        assert_eq!(v,
-                   vec!(2558573138, 873787463, 263499565, 2103644246, 3595684709,
-                        4203127393, 264982119, 2765226902, 2737944514, 3900253796));
-
-        let seed: &[_] = &[12345, 67890, 54321, 9876];
-        let mut rb: IsaacRng = SeedableRng::from_seed(seed);
-        // skip forward to the 10000th number
-        for _ in 0..10000 { rb.next_u32(); }
-
-        let v = (0..10).map(|_| rb.next_u32()).collect::<Vec<_>>();
-        assert_eq!(v,
-                   vec!(3676831399, 3183332890, 2834741178, 3854698763, 2717568474,
-                        1576568959, 3507990155, 179069555, 141456972, 2478885421));
-    }
-    #[test]
-    fn test_rng_64_true_values() {
-        let seed: &[_] = &[1, 23, 456, 7890, 12345];
-        let mut ra: Isaac64Rng = SeedableRng::from_seed(seed);
-        // Regression test that isaac is actually using the above vector
-        let v = (0..10).map(|_| ra.next_u64()).collect::<Vec<_>>();
-        assert_eq!(v,
-                   vec!(547121783600835980, 14377643087320773276, 17351601304698403469,
-                        1238879483818134882, 11952566807690396487, 13970131091560099343,
-                        4469761996653280935, 15552757044682284409, 6860251611068737823,
-                        13722198873481261842));
-
-        let seed: &[_] = &[12345, 67890, 54321, 9876];
-        let mut rb: Isaac64Rng = SeedableRng::from_seed(seed);
-        // skip forward to the 10000th number
-        for _ in 0..10000 { rb.next_u64(); }
-
-        let v = (0..10).map(|_| rb.next_u64()).collect::<Vec<_>>();
-        assert_eq!(v,
-                   vec!(18143823860592706164, 8491801882678285927, 2699425367717515619,
-                        17196852593171130876, 2606123525235546165, 15790932315217671084,
-                        596345674630742204, 9947027391921273664, 11788097613744130851,
-                        10391409374914919106));
-    }
-
-    #[test]
-    fn test_rng_clone() {
-        let seed: &[_] = &[1, 23, 456, 7890, 12345];
-        let mut rng: Isaac64Rng = SeedableRng::from_seed(seed);
-        let mut clone = rng.clone();
-        for _ in 0..16 {
-            assert_eq!(rng.next_u64(), clone.next_u64());
-        }
-    }
-}
diff --git a/src/jitter.rs b/src/jitter.rs
new file mode 100644
index 00000000..f3d94912
--- /dev/null
+++ b/src/jitter.rs
@@ -0,0 +1,739 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//
+// Based on jitterentropy-library, http://www.chronox.de/jent.html.
+// Copyright Stephan Mueller <smueller@chronox.de>, 2014 - 2017.
+//
+// With permission from Stephan Mueller to relicense the Rust translation under
+// the MIT license.
+
+//! Non-physical true random number generator based on timing jitter.
+
+use {Rng, impls};
+
+use core::{fmt, mem, ptr};
+#[cfg(feature="std")]
+use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering};
+
+const MEMORY_BLOCKS: usize = 64;
+const MEMORY_BLOCKSIZE: usize = 32;
+const MEMORY_SIZE: usize = MEMORY_BLOCKS * MEMORY_BLOCKSIZE;
+
+/// A true random number generator based on jitter in the CPU execution time,
+/// and jitter in memory access time.
+///
+/// This is a true random number generator, as opposed to pseudo-random
+/// generators. Random numbers generated by `JitterRng` can be seen as fresh
+/// entropy. A consequence is that is orders of magnitude slower than `OsRng`
+/// and PRNGs (about 10^3 .. 10^6 slower).
+///
+/// There are very few situations where using this RNG is appropriate. Only very
+/// few applications require true entropy. A normal PRNG can be statistically
+/// indistinguishable, and a cryptographic PRNG should also be as impossible to
+/// predict.
+///
+/// Use of `JitterRng` is recommended for initializing cryptographic PRNGs when
+/// `OsRng` is not available.
+///
+/// This implementation is based on
+/// [Jitterentropy](http://www.chronox.de/jent.html) version 2.1.0.
+//
+// Note: the C implementation relies on being compiled without optimizations.
+// This implementation goes through lengths to make the compiler not optimise
+// out what is technically dead code, but that does influence timing jitter.
+pub struct JitterRng {
+    data: u64, // Actual random number
+    // Number of rounds to run the entropy collector per 64 bits
+    rounds: u32,
+    // Timer and previous time stamp, used by `measure_jitter`
+    timer: fn() -> u64,
+    prev_time: u64,
+    // Deltas used for the stuck test
+    last_delta: i64,
+    last_delta2: i64,
+    // Memory for the Memory Access noise source
+    mem_prev_index: usize,
+    mem: [u8; MEMORY_SIZE],
+    // Make `next_u32` not waste 32 bits
+    data_remaining: Option<u32>,
+}
+
+// Custom Debug implementation that does not expose the internal state
+impl fmt::Debug for JitterRng {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "JitterRng {{}}")
+    }
+}
+
+/// An error that can occur when `test_timer` fails.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum TimerError {
+    /// No timer available.
+    NoTimer,
+    /// Timer too coarse to use as an entropy source.
+    CoarseTimer,
+    /// Timer is not monotonically increasing.
+    NotMonotonic,
+    /// Variations of deltas of time too small.
+    TinyVariantions,
+    /// Too many stuck results (indicating no added entropy).
+    TooManyStuck,
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl TimerError {
+    fn description(&self) -> &'static str {
+        match *self {
+            TimerError::NoTimer => "no timer available",
+            TimerError::CoarseTimer => "coarse timer",
+            TimerError::NotMonotonic => "timer not monotonic",
+            TimerError::TinyVariantions => "time delta variations too small",
+            TimerError::TooManyStuck => "too many stuck results",
+            TimerError::__Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for TimerError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.description())
+    }
+}
+
+#[cfg(feature="std")]
+impl ::std::error::Error for TimerError {
+    fn description(&self) -> &str {
+        self.description()
+    }
+}
+
+// Initialise to zero; must be positive
+#[cfg(feature="std")]
+static JITTER_ROUNDS: AtomicUsize = ATOMIC_USIZE_INIT;
+
+impl JitterRng {
+    /// Create a new `JitterRng`.
+    /// Makes use of `std::time` for a timer.
+    ///
+    /// During initialization CPU execution timing jitter is measured a few
+    /// hundred times. If this does not pass basic quality tests, an error is
+    /// returned. The test result is cached to make subsequent calls faster.
+    #[cfg(feature="std")]
+    pub fn new() -> Result<JitterRng, TimerError> {
+        let mut ec = JitterRng::new_with_timer(platform::get_nstime);
+        let mut rounds = JITTER_ROUNDS.load(Ordering::Relaxed) as u32;
+        if rounds == 0 {
+            // No result yet: run test.
+            // This allows the timer test to run multiple times; we don't care.
+            rounds = ec.test_timer()?;
+            JITTER_ROUNDS.store(rounds as usize, Ordering::Relaxed);
+        }
+        ec.set_rounds(rounds);
+        Ok(ec)
+    }
+
+    /// Create a new `JitterRng`.
+    /// A custom timer can be supplied, making it possible to use `JitterRng` in
+    /// `no_std` environments.
+    ///
+    /// The timer must have nanosecond precision.
+    ///
+    /// This method is more low-level than `new()`. It is the responsibility of
+    /// the caller to run `test_timer` before using any numbers generated with
+    /// `JitterRng`, and optionally call `set_rounds()`.
+    pub fn new_with_timer(timer: fn() -> u64) -> JitterRng {
+        let mut ec = JitterRng {
+            data: 0,
+            rounds: 64,
+            timer: timer,
+            prev_time: 0,
+            last_delta: 0,
+            last_delta2: 0,
+            mem_prev_index: 0,
+            mem: [0; MEMORY_SIZE],
+            data_remaining: None,
+        };
+
+        // Fill `data`, `prev_time`, `last_delta` and `last_delta2` with
+        // non-zero values.
+        ec.prev_time = timer();
+        ec.gen_entropy();
+
+        // Do a single read from `self.mem` to make sure the Memory Access noise
+        // source is not optimised out.
+        // Note: this read is important, it effects optimisations for the entire
+        // module!
+        black_box(ec.mem[0]);
+
+        ec
+    }
+
+    /// Configures how many rounds are used to generate each 64-bit value.
+    /// This must be greater than zero, and has a big impact on performance
+    /// and output quality.
+    ///
+    /// `new_with_timer` conservatively uses 64 rounds, but often less rounds
+    /// can be used. The `test_timer()` function returns the minimum number of
+    /// rounds required for full strength (platform dependent), so one may use
+    /// `rng.set_rounds(rng.test_timer()?);` or cache the value.
+    pub fn set_rounds(&mut self, rounds: u32) {
+        assert!(rounds > 0);
+        self.rounds = rounds;
+    }
+
+    // Calculate a random loop count used for the next round of an entropy
+    // collection, based on bits from a fresh value from the timer.
+    //
+    // The timer is folded to produce a number that contains at most `n_bits`
+    // bits.
+    //
+    // Note: A constant should be added to the resulting random loop count to
+    // prevent loops that run 0 times.
+    #[inline(never)]
+    fn random_loop_cnt(&mut self, n_bits: u32) -> u32 {
+        let mut rounds = 0;
+
+        let mut time = (self.timer)();
+        // Mix with the current state of the random number balance the random
+        // loop counter a bit more.
+        time ^= self.data;
+
+        // We fold the time value as much as possible to ensure that as many
+        // bits of the time stamp are included as possible.
+        let folds = (64 + n_bits - 1) / n_bits;
+        let mask = (1 << n_bits) - 1;
+        for _ in 0..folds {
+            rounds ^= time & mask;
+            time = time >> n_bits;
+        }
+
+        rounds as u32
+    }
+
+    // CPU jitter noise source
+    // Noise source based on the CPU execution time jitter
+    //
+    // This function injects the individual bits of the time value into the
+    // entropy pool using an LFSR.
+    //
+    // The code is deliberately inefficient with respect to the bit shifting.
+    // This function not only acts as folding operation, but this function's
+    // execution is used to measure the CPU execution time jitter. Any change to
+    // the loop in this function implies that careful retesting must be done.
+    #[inline(never)]
+    fn lfsr_time(&mut self, time: u64, var_rounds: bool) {
+        fn lfsr(mut data: u64, time: u64) -> u64{
+            for i in 1..65 {
+                let mut tmp = time << (64 - i);
+                tmp = tmp >> (64 - 1);
+
+                // Fibonacci LSFR with polynomial of
+                // x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is
+                // primitive according to
+                // http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf
+                // (the shift values are the polynomial values minus one
+                // due to counting bits from 0 to 63). As the current
+                // position is always the LSB, the polynomial only needs
+                // to shift data in from the left without wrap.
+                data ^= tmp;
+                data ^= (data >> 63) & 1;
+                data ^= (data >> 60) & 1;
+                data ^= (data >> 55) & 1;
+                data ^= (data >> 30) & 1;
+                data ^= (data >> 27) & 1;
+                data ^= (data >> 22) & 1;
+                data = data.rotate_left(1);
+            }
+            data
+        }
+
+        // Note: in the reference implementation only the last round effects
+        // `self.data`, all the other results are ignored. To make sure the
+        // other rounds are not optimised out, we first run all but the last
+        // round on a throw-away value instead of the real `self.data`.
+        let mut lfsr_loop_cnt = 0;
+        if var_rounds { lfsr_loop_cnt = self.random_loop_cnt(4) };
+
+        let mut throw_away: u64 = 0;
+        for _ in 0..lfsr_loop_cnt {
+            throw_away = lfsr(throw_away, time);
+        }
+        black_box(throw_away);
+
+        self.data = lfsr(self.data, time);
+    }
+
+    // Memory Access noise source
+    // This is a noise source based on variations in memory access times
+    //
+    // This function performs memory accesses which will add to the timing
+    // variations due to an unknown amount of CPU wait states that need to be
+    // added when accessing memory. The memory size should be larger than the L1
+    // caches as outlined in the documentation and the associated testing.
+    //
+    // The L1 cache has a very high bandwidth, albeit its access rate is usually
+    // slower than accessing CPU registers. Therefore, L1 accesses only add
+    // minimal variations as the CPU has hardly to wait. Starting with L2,
+    // significant variations are added because L2 typically does not belong to
+    // the CPU any more and therefore a wider range of CPU wait states is
+    // necessary for accesses. L3 and real memory accesses have even a wider
+    // range of wait states. However, to reliably access either L3 or memory,
+    // the `self.mem` memory must be quite large which is usually not desirable.
+    #[inline(never)]
+    fn memaccess(&mut self, var_rounds: bool) {
+        let mut acc_loop_cnt = 128;
+        if var_rounds { acc_loop_cnt += self.random_loop_cnt(4) };
+
+        let mut index = self.mem_prev_index;
+        for _ in 0..acc_loop_cnt {
+            // Addition of memblocksize - 1 to index with wrap around logic to
+            // ensure that every memory location is hit evenly.
+            // The modulus also allows the compiler to remove the indexing
+            // bounds check.
+            index = (index + MEMORY_BLOCKSIZE - 1) % MEMORY_SIZE;
+
+            // memory access: just add 1 to one byte
+            // memory access implies read from and write to memory location
+            let tmp = self.mem[index];
+            self.mem[index] = tmp.wrapping_add(1);
+        }
+        self.mem_prev_index = index;
+    }
+
+
+    // Stuck test by checking the:
+    // - 1st derivation of the jitter measurement (time delta)
+    // - 2nd derivation of the jitter measurement (delta of time deltas)
+    // - 3rd derivation of the jitter measurement (delta of delta of time
+    //   deltas)
+    //
+    // All values must always be non-zero.
+    // This test is a heuristic to see whether the last measurement holds
+    // entropy.
+    fn stuck(&mut self, current_delta: i64) -> bool {
+        let delta2 = self.last_delta - current_delta;
+        let delta3 = delta2 - self.last_delta2;
+
+        self.last_delta = current_delta;
+        self.last_delta2 = delta2;
+
+        current_delta == 0 || delta2 == 0 || delta3 == 0
+    }
+
+    // This is the heart of the entropy generation: calculate time deltas and
+    // use the CPU jitter in the time deltas. The jitter is injected into the
+    // entropy pool.
+    //
+    // Ensure that `self.prev_time` is primed before using the output of this
+    // function. This can be done by calling this function and not using its
+    // result.
+    fn measure_jitter(&mut self) -> Option<()> {
+        // Invoke one noise source before time measurement to add variations
+        self.memaccess(true);
+
+        // Get time stamp and calculate time delta to previous
+        // invocation to measure the timing variations
+        let time = (self.timer)();
+        // Note: wrapping_sub combined with a cast to `i64` generates a correct
+        // delta, even in the unlikely case this is a timer that is not strictly
+        // monotonic.
+        let current_delta = time.wrapping_sub(self.prev_time) as i64;
+        self.prev_time = time;
+
+        // Call the next noise source which also injects the data
+        self.lfsr_time(current_delta as u64, true);
+
+        // Check whether we have a stuck measurement (i.e. does the last
+        // measurement holds entropy?).
+        if self.stuck(current_delta) { return None };
+
+        // Rotate the data buffer by a prime number (any odd number would
+        // do) to ensure that every bit position of the input time stamp
+        // has an even chance of being merged with a bit position in the
+        // entropy pool. We do not use one here as the adjacent bits in
+        // successive time deltas may have some form of dependency. The
+        // chosen value of 7 implies that the low 7 bits of the next
+        // time delta value is concatenated with the current time delta.
+        self.data = self.data.rotate_left(7);
+
+        Some(())
+    }
+
+    // Shuffle the pool a bit by mixing some value with a bijective function
+    // (XOR) into the pool.
+    //
+    // The function generates a mixer value that depends on the bits set and
+    // the location of the set bits in the random number generated by the
+    // entropy source. Therefore, based on the generated random number, this
+    // mixer value can have 2^64 different values. That mixer value is
+    // initialized with the first two SHA-1 constants. After obtaining the
+    // mixer value, it is XORed into the random number.
+    //
+    // The mixer value is not assumed to contain any entropy. But due to the
+    // XOR operation, it can also not destroy any entropy present in the
+    // entropy pool.
+    #[inline(never)]
+    fn stir_pool(&mut self) {
+        // This constant is derived from the first two 32 bit initialization
+        // vectors of SHA-1 as defined in FIPS 180-4 section 5.3.1
+        // The order does not really matter as we do not rely on the specific
+        // numbers. We just pick the SHA-1 constants as they have a good mix of
+        // bit set and unset.
+        const CONSTANT: u64 = 0x67452301efcdab89;
+
+        // The start value of the mixer variable is derived from the third
+        // and fourth 32 bit initialization vector of SHA-1 as defined in
+        // FIPS 180-4 section 5.3.1
+        let mut mixer = 0x98badcfe10325476;
+
+        // This is a constant time function to prevent leaking timing
+        // information about the random number.
+        // The normal code is:
+        // ```
+        // for i in 0..64 {
+        //     if ((self.data >> i) & 1) == 1 { mixer ^= CONSTANT; }
+        // }
+        // ```
+        // This is a bit fragile, as LLVM really wants to use branches here, and
+        // we rely on it to not recognise the opportunity.
+        for i in 0..64 {
+            let apply = (self.data >> i) & 1;
+            let mask = !apply.wrapping_sub(1);
+            mixer ^= CONSTANT & mask;
+            mixer = mixer.rotate_left(1);
+        }
+
+        self.data ^= mixer;
+    }
+
+    fn gen_entropy(&mut self) -> u64 {
+        // Prime `self.prev_time`, and run the noice sources to make sure the
+        // first loop round collects the expected entropy.
+        let _ = self.measure_jitter();
+
+        for _ in 0..self.rounds {
+            // If a stuck measurement is received, repeat measurement
+            // Note: we do not guard against an infinite loop, that would mean
+            // the timer suddenly became broken.
+            while self.measure_jitter().is_none() {}
+        }
+
+        self.stir_pool();
+        self.data
+    }
+
+    /// Basic quality tests on the timer, by measuring CPU timing jitter a few
+    /// hundred times.
+    ///
+    /// If succesful, this will return the estimated number of rounds necessary
+    /// to collect 64 bits of entropy. Otherwise a `TimerError` with the cause
+    /// of the failure will be returned.
+    pub fn test_timer(&mut self) -> Result<u32, TimerError> {
+        // We could add a check for system capabilities such as `clock_getres`
+        // or check for `CONFIG_X86_TSC`, but it does not make much sense as the
+        // following sanity checks verify that we have a high-resolution timer.
+
+        #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))]
+        return Err(TimerError::NoTimer);
+
+        let mut delta_sum = 0;
+        let mut old_delta = 0;
+
+        let mut time_backwards = 0;
+        let mut count_mod = 0;
+        let mut count_stuck = 0;
+
+        // TESTLOOPCOUNT needs some loops to identify edge systems.
+        // 100 is definitely too little.
+        const TESTLOOPCOUNT: u64 = 300;
+        const CLEARCACHE: u64 = 100;
+
+        for i in 0..(CLEARCACHE + TESTLOOPCOUNT) {
+            // Measure time delta of core entropy collection logic
+            let time = (self.timer)();
+            self.memaccess(true);
+            self.lfsr_time(time, true);
+            let time2 = (self.timer)();
+
+            // Test whether timer works
+            if time == 0 || time2 == 0 {
+                return Err(TimerError::NoTimer);
+            }
+            let delta = time2.wrapping_sub(time) as i64;
+
+            // Test whether timer is fine grained enough to provide delta even
+            // when called shortly after each other -- this implies that we also
+            // have a high resolution timer
+            if delta == 0 {
+                return Err(TimerError::CoarseTimer);
+            }
+
+            // Up to here we did not modify any variable that will be
+            // evaluated later, but we already performed some work. Thus we
+            // already have had an impact on the caches, branch prediction,
+            // etc. with the goal to clear it to get the worst case
+            // measurements.
+            if i < CLEARCACHE { continue; }
+
+            if self.stuck(delta) { count_stuck += 1; }
+
+            // Test whether we have an increasing timer.
+            if !(time2 > time) { time_backwards += 1; }
+
+            // Count the number of times the counter increases in steps of 100ns
+            // or greater.
+            if (delta % 100) == 0 { count_mod += 1; }
+
+            // Ensure that we have a varying delta timer which is necessary for
+            // the calculation of entropy -- perform this check only after the
+            // first loop is executed as we need to prime the old_delta value
+            delta_sum += (delta - old_delta).abs() as u64;
+            old_delta = delta;
+        }
+
+        // We allow the time to run backwards for up to three times.
+        // This can happen if the clock is being adjusted by NTP operations.
+        // If such an operation just happens to interfere with our test, it
+        // should not fail. The value of 3 should cover the NTP case being
+        // performed during our test run.
+        if time_backwards > 3 {
+            return Err(TimerError::NotMonotonic);
+        }
+
+        // Test that the available amount of entropy per round does not get to
+        // low. We expect 1 bit of entropy per round as a reasonable minimum
+        // (although less is possible, it means the collector loop has to run
+        // much more often).
+        // `assert!(delta_average >= log2(1))`
+        // `assert!(delta_sum / TESTLOOPCOUNT >= 1)`
+        // `assert!(delta_sum >= TESTLOOPCOUNT)`
+        if delta_sum < TESTLOOPCOUNT {
+            return Err(TimerError::TinyVariantions);
+        }
+
+        // Ensure that we have variations in the time stamp below 100 for at
+        // least 10% of all checks -- on some platforms, the counter increments
+        // in multiples of 100, but not always
+        if count_mod > (TESTLOOPCOUNT * 9 / 10) {
+            return Err(TimerError::CoarseTimer);
+        }
+
+        // If we have more than 90% stuck results, then this Jitter RNG is
+        // likely to not work well.
+        if count_stuck > (TESTLOOPCOUNT * 9 / 10) {
+            return Err(TimerError::TooManyStuck);
+        }
+
+        // Estimate the number of `measure_jitter` rounds necessary for 64 bits
+        // of entropy.
+        //
+        // We don't try very hard to come up with a good estimate of the
+        // available bits of entropy per round here for two reasons:
+        // 1. Simple estimates of the available bits (like Shannon entropy) are
+        //    too optimistic.
+        // 2)  Unless we want to waste a lot of time during intialization, there
+        //     only a small number of samples are available.
+        //
+        // Therefore we use a very simple and conservative estimate:
+        // `let bits_of_entropy = log2(delta_average) / 2`.
+        //
+        // The number of rounds `measure_jitter` should run to collect 64 bits
+        // of entropy is `64 / bits_of_entropy`.
+        //
+        // To have smaller rounding errors, intermediate values are multiplied
+        // by `FACTOR`. To compensate for `log2` and division rounding down,
+        // add 1.
+        let delta_average = delta_sum / TESTLOOPCOUNT;
+        // println!("delta_average: {}", delta_average);
+
+        const FACTOR: u32  = 3;
+        fn log2(x: u64) -> u32 { 64 - x.leading_zeros() }
+
+        // pow(δ, FACTOR) must be representable; if you have overflow reduce FACTOR
+        Ok(64 * 2 * FACTOR / (log2(delta_average.pow(FACTOR)) + 1))
+    }
+
+    /// Statistical test: return the timer delta of one normal run of the
+    /// `JitterEntropy` entropy collector.
+    ///
+    /// Setting `var_rounds` to `true` will execute the memory access and the
+    /// CPU jitter noice sources a variable amount of times (just like a real
+    /// `JitterEntropy` round).
+    ///
+    /// Setting `var_rounds` to `false` will execute the noice sources the
+    /// minimal number of times. This can be used to measure the minimum amount
+    /// of entropy one round of entropy collector can collect in the worst case.
+    ///
+    /// # Example
+    ///
+    /// Use `timer_stats` to run the [NIST SP 800-90B Entropy Estimation Suite]
+    /// (https://github.com/usnistgov/SP800-90B_EntropyAssessment).
+    ///
+    /// This is the recommended way to test the quality of `JitterRng`. It
+    /// should be run before using the RNG on untested hardware, after changes
+    /// that could effect how the code is optimised, and after major compiler
+    /// compiler changes, like a new LLVM version.
+    ///
+    /// First generate two files `jitter_rng_var.bin` and `jitter_rng_var.min`.
+    ///
+    /// Execute `python noniid_main.py -v jitter_rng_var.bin 8`, and validate it
+    /// with `restart.py -v jitter_rng_var.bin 8 <min-entropy>`.
+    /// This number is the expected amount of entropy that is at least available
+    /// for each round of the entropy collector. This number should be greater
+    /// than the amount estimated with `64 / test_timer()`.
+    ///
+    /// Execute `python noniid_main.py -v -u 4 jitter_rng_var.bin 4`, and
+    /// validate it with `restart.py -v -u 4 jitter_rng_var.bin 4 <min-entropy>`.
+    /// This number is the expected amount of entropy that is available in the
+    /// last 4 bits of the timer delta after running noice sources. Note that
+    /// a value of 3.70 is the minimum estimated entropy for true randomness.
+    ///
+    /// Execute `python noniid_main.py -v -u 4 jitter_rng_var.bin 4`, and
+    /// validate it with `restart.py -v -u 4 jitter_rng_var.bin 4 <min-entropy>`.
+    /// This number is the expected amount of entropy that is available to the
+    /// entropy collecter if both noice sources only run their minimal number of
+    /// times. This measures the absolute worst-case, and gives a lower bound
+    /// for the available entropy.
+    ///
+    /// ```rust,no_run
+    /// use rand::JitterRng;
+    ///
+    /// # use std::error::Error;
+    /// # use std::fs::File;
+    /// # use std::io::Write;
+    /// #
+    /// # fn try_main() -> Result<(), Box<Error>> {
+    /// fn get_nstime() -> u64 {
+    ///     use std::time::{SystemTime, UNIX_EPOCH};
+    ///
+    ///     let dur = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
+    ///     // The correct way to calculate the current time is
+    ///     // `dur.as_secs() * 1_000_000_000 + dur.subsec_nanos() as u64`
+    ///     // But this is faster, and the difference in terms of entropy is
+    ///     // negligible (log2(10^9) == 29.9).
+    ///     dur.as_secs() << 30 | dur.subsec_nanos() as u64
+    /// }
+    ///
+    /// // Do not initialize with `JitterRng::new`, but with `new_with_timer`.
+    /// // 'new' always runst `test_timer`, and can therefore fail to
+    /// // initialize. We want to be able to get the statistics even when the
+    /// // timer test fails.
+    /// let mut rng = JitterRng::new_with_timer(get_nstime);
+    ///
+    /// // 1_000_000 results are required for the NIST SP 800-90B Entropy
+    /// // Estimation Suite
+    /// // FIXME: this number is smaller here, otherwise the Doc-test is too slow
+    /// const ROUNDS: usize = 10_000;
+    /// let mut deltas_variable: Vec<u8> = Vec::with_capacity(ROUNDS);
+    /// let mut deltas_minimal: Vec<u8> = Vec::with_capacity(ROUNDS);
+    ///
+    /// for _ in 0..ROUNDS {
+    ///     deltas_variable.push(rng.timer_stats(true) as u8);
+    ///     deltas_minimal.push(rng.timer_stats(false) as u8);
+    /// }
+    ///
+    /// // Write out after the statistics collection loop, to not disturb the
+    /// // test results.
+    /// File::create("jitter_rng_var.bin")?.write(&deltas_variable)?;
+    /// File::create("jitter_rng_min.bin")?.write(&deltas_minimal)?;
+    /// #
+    /// # Ok(())
+    /// # }
+    /// #
+    /// # fn main() {
+    /// #     try_main().unwrap();
+    /// # }
+    /// ```
+    #[cfg(feature="std")]
+    pub fn timer_stats(&mut self, var_rounds: bool) -> i64 {
+        let time = platform::get_nstime();
+        self.memaccess(var_rounds);
+        self.lfsr_time(time, var_rounds);
+        let time2 = platform::get_nstime();
+        time2.wrapping_sub(time) as i64
+    }
+}
+
+#[cfg(feature="std")]
+mod platform {
+    #[cfg(not(any(target_os = "macos", target_os = "ios", target_os = "windows", all(target_arch = "wasm32", not(target_os = "emscripten")))))]
+    pub fn get_nstime() -> u64 {
+        use std::time::{SystemTime, UNIX_EPOCH};
+
+        let dur = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
+        // The correct way to calculate the current time is
+        // `dur.as_secs() * 1_000_000_000 + dur.subsec_nanos() as u64`
+        // But this is faster, and the difference in terms of entropy is negligible
+        // (log2(10^9) == 29.9).
+        dur.as_secs() << 30 | dur.subsec_nanos() as u64
+    }
+
+    #[cfg(any(target_os = "macos", target_os = "ios"))]
+    pub fn get_nstime() -> u64 {
+        extern crate libc;
+        // On Mac OS and iOS std::time::SystemTime only has 1000ns resolution.
+        // We use `mach_absolute_time` instead. This provides a CPU dependent unit,
+        // to get real nanoseconds the result should by multiplied by numer/denom
+        // from `mach_timebase_info`.
+        // But we are not interested in the exact nanoseconds, just entropy. So we
+        // use the raw result.
+        unsafe { libc::mach_absolute_time() }
+    }
+
+    #[cfg(target_os = "windows")]
+    pub fn get_nstime() -> u64 {
+        extern crate winapi;
+        unsafe {
+            let mut t = super::mem::zeroed();
+            winapi::um::profileapi::QueryPerformanceCounter(&mut t);
+            *t.QuadPart() as u64
+        }
+    }
+
+    #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))]
+    pub fn get_nstime() -> u64 {
+        unreachable!()
+    }
+}
+
+// A function that is opaque to the optimizer to assist in avoiding dead-code
+// elimination. Taken from `bencher`.
+fn black_box<T>(dummy: T) -> T {
+    unsafe {
+        let ret = ptr::read_volatile(&dummy);
+        mem::forget(dummy);
+        ret
+    }
+}
+
+impl Rng for JitterRng {
+    fn next_u32(&mut self) -> u32 {
+        // We want to use both parts of the generated entropy
+        if let Some(high) = self.data_remaining.take() {
+            high
+        } else {
+            let data = self.next_u64();
+            self.data_remaining = Some((data >> 32) as u32);
+            data as u32
+        }
+    }
+
+    fn next_u64(&mut self) -> u64 {
+       self.gen_entropy()
+    }
+
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        impls::fill_bytes_via_u64(self, dest)
+    }
+}
+
+// There are no tests included because (1) this is an "external" RNG, so output
+// is not reproducible and (2) `test_timer` *will* fail on some platforms.
diff --git a/src/lib.rs b/src/lib.rs
index d4f60399..ead94078 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,4 @@
-// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2013-2017 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // https://rust-lang.org/COPYRIGHT.
 //
@@ -26,7 +26,7 @@
 //!
 //! ```toml
 //! [dependencies]
-//! rand = "0.3"
+//! rand = "0.4"
 //! ```
 //!
 //! and this to your crate root:
@@ -184,7 +184,7 @@
 //! // where the car is. The game host will never open the door with the car.
 //! fn game_host_open<R: Rng>(car: u32, choice: u32, rng: &mut R) -> u32 {
 //!     let choices = free_doors(&[car, choice]);
-//!     rand::sample(rng, choices.into_iter(), 1)[0]
+//!     rand::seq::sample_slice(rng, &choices, 1)[0]
 //! }
 //!
 //! // Returns the door we switch to, given our current choice and
@@ -243,43 +243,60 @@
 
 #![deny(missing_debug_implementations)]
 
-#![cfg_attr(feature = "i128_support", feature(i128_type))]
+#![cfg_attr(not(feature="std"), no_std)]
+#![cfg_attr(all(feature="alloc", not(feature="std")), feature(alloc))]
+#![cfg_attr(feature = "i128_support", feature(i128_type, i128))]
 
-#[cfg(test)] #[macro_use] extern crate log;
+#[cfg(feature="std")] extern crate std as core;
+#[cfg(all(feature = "alloc", not(feature="std")))] extern crate alloc;
 
+use core::marker;
+use core::mem;
+#[cfg(feature="std")] use std::cell::RefCell;
+#[cfg(feature="std")] use std::io;
+#[cfg(feature="std")] use std::rc::Rc;
 
-use std::cell::RefCell;
-use std::marker;
-use std::mem;
-use std::io;
-use std::rc::Rc;
-use std::num::Wrapping as w;
-
-pub use os::OsRng;
+// external rngs
+pub use jitter::JitterRng;
+#[cfg(feature="std")] pub use os::OsRng;
 
+// pseudo rngs
 pub use isaac::{IsaacRng, Isaac64Rng};
 pub use chacha::ChaChaRng;
+pub use prng::XorShiftRng;
 
+// local use declarations
 #[cfg(target_pointer_width = "32")]
-use IsaacRng as IsaacWordRng;
+use prng::IsaacRng as IsaacWordRng;
 #[cfg(target_pointer_width = "64")]
-use Isaac64Rng as IsaacWordRng;
+use prng::Isaac64Rng as IsaacWordRng;
 
 use distributions::{Range, IndependentSample};
 use distributions::range::SampleRange;
 
+// public modules
 pub mod distributions;
-pub mod isaac;
-pub mod chacha;
+mod impls;
+pub mod jitter;
+#[cfg(feature="std")] pub mod os;
+#[cfg(feature="std")] pub mod read;
 pub mod reseeding;
-mod rand_impls;
-pub mod os;
-pub mod read;
+#[cfg(any(feature="std", feature = "alloc"))] pub mod seq;
+
+// These tiny modules are here to avoid API breakage, probably only temporarily
+pub mod chacha {
+    //! The ChaCha random number generator.
+    pub use prng::ChaChaRng;
+}
+pub mod isaac {
+    //! The ISAAC random number generator.
+    pub use prng::{IsaacRng, Isaac64Rng};
+}
+
+// private modules
+mod rand_impls;
+mod prng;
 
-#[allow(bad_style)]
-type w64 = w<u64>;
-#[allow(bad_style)]
-type w32 = w<u32>;
 
 /// A type that can be randomly generated using an `Rng`.
 ///
@@ -303,8 +320,8 @@ type w32 = w<u32>;
 ///
 /// [`Open01`]: struct.Open01.html
 /// [`Closed01`]: struct.Closed01.html
-/// [`Exp1`]: struct.Exp1.html
-/// [`StandardNormal`]: struct.StandardNormal.html
+/// [`Exp1`]: distributions/exponential/struct.Exp1.html
+/// [`StandardNormal`]: distributions/normal/struct.StandardNormal.html
 ///
 /// The following aggregate types also implement `Rand` as long as their
 /// component types implement it:
@@ -313,7 +330,6 @@ type w32 = w<u32>;
 ///   independently, using its own `Rand` implementation.
 /// * `Option<T>`: Returns `None` with probability 0.5; otherwise generates a
 ///   random `T` and returns `Some(T)`.
-
 pub trait Rand : Sized {
     /// Generates a random instance of this type using the specified source of
     /// randomness.
@@ -322,21 +338,28 @@ pub trait Rand : Sized {
 
 /// A random number generator.
 pub trait Rng {
-    /// Return the next random u32.
+    /// Return the next random `u32`.
     ///
-    /// This rarely needs to be called directly, prefer `r.gen()` to
-    /// `r.next_u32()`.
-    // FIXME #rust-lang/rfcs#628: Should be implemented in terms of next_u64
+    /// Implementations of this trait must implement at least one of
+    /// `next_u32`, `next_u64` and `fill_bytes` directly. In the case this
+    /// function is not implemented directly, it can be implemented using
+    /// `self.next_u64() as u32` or via `fill_bytes` (TODO: expose helper
+    /// function).
     fn next_u32(&mut self) -> u32;
 
-    /// Return the next random u64.
+    /// Return the next random `u64`.
     ///
-    /// By default this is implemented in terms of `next_u32`. An
-    /// implementation of this trait must provide at least one of
-    /// these two methods. Similarly to `next_u32`, this rarely needs
-    /// to be called directly, prefer `r.gen()` to `r.next_u64()`.
+    /// Implementations of this trait must implement at least one of
+    /// `next_u32`, `next_u64` and `fill_bytes` directly. In the case this
+    /// function is not implemented directly, the default implementation will
+    /// generate values via `next_u32` in little-endian fashion, or this
+    /// function can be implemented via `fill_bytes` (TODO: expose helper
+    /// function).
+    ///
+    /// Types wrapping an inner RNG must not use the default implementation,
+    /// since the inner RNG's implementation may produce different values.
     fn next_u64(&mut self) -> u64 {
-        ((self.next_u32() as u64) << 32) | (self.next_u32() as u64)
+        impls::next_u64_via_u32(self)
     }
 
     /// Return the next random f32 selected from the half-open
@@ -393,16 +416,20 @@ pub trait Rng {
 
     /// Fill `dest` with random data.
     ///
-    /// This has a default implementation in terms of `next_u64` and
-    /// `next_u32`, but should be overridden by implementations that
-    /// offer a more efficient solution than just calling those
-    /// methods repeatedly.
+    /// Implementations of this trait must implement at least one of
+    /// `next_u32`, `next_u64` and `fill_bytes` directly. In the case this
+    /// function is not implemented directly, the default implementation will
+    /// generate values via `next_u64` in little-endian fashion.
+    /// (TODO: expose helper function to allow implementation via `next_u32`.)
     ///
-    /// This method does *not* have a requirement to bear any fixed
-    /// relationship to the other methods, for example, it does *not*
-    /// have to result in the same output as progressively filling
-    /// `dest` with `self.gen::<u8>()`, and any such behaviour should
-    /// not be relied upon.
+    /// There is no requirement on how this method generates values relative to
+    /// `next_u32` or `next_u64`; e.g. a `u64` cast to bytes is not required to
+    /// have the same value as eight bytes filled via this function. There *is*
+    /// a requirement of portability for reproducible generators which implies
+    /// that any seedable generator must fix endianness when generating bytes.
+    ///
+    /// Types wrapping an inner RNG must not use the default implementation,
+    /// since the inner RNG's implementation may produce different values.
     ///
     /// This method should guarantee that `dest` is entirely filled
     /// with new data, and may panic if this is impossible
@@ -419,27 +446,7 @@ pub trait Rng {
     /// println!("{:?}", &v[..]);
     /// ```
     fn fill_bytes(&mut self, dest: &mut [u8]) {
-        // this could, in theory, be done by transmuting dest to a
-        // [u64], but this is (1) likely to be undefined behaviour for
-        // LLVM, (2) has to be very careful about alignment concerns,
-        // (3) adds more `unsafe` that needs to be checked, (4)
-        // probably doesn't give much performance gain if
-        // optimisations are on.
-        let mut count = 0;
-        let mut num = 0;
-        for byte in dest.iter_mut() {
-            if count == 0 {
-                // we could micro-optimise here by generating a u32 if
-                // we only need a few more bytes to fill the vector
-                // (i.e. at most 4).
-                num = self.next_u64();
-                count = 8;
-            }
-
-            *byte = (num & 0xff) as u8;
-            num >>= 8;
-            count -= 1;
-        }
+        impls::fill_bytes_via_u64(self, dest)
     }
 
     /// Return a random value of a `Rand` type.
@@ -617,6 +624,7 @@ impl<'a, R: ?Sized> Rng for &'a mut R where R: Rng {
     }
 }
 
+#[cfg(feature="std")]
 impl<R: ?Sized> Rng for Box<R> where R: Rng {
     fn next_u32(&mut self) -> u32 {
         (**self).next_u32()
@@ -714,93 +722,6 @@ pub trait SeedableRng<Seed>: Rng {
     fn from_seed(seed: Seed) -> Self;
 }
 
-/// An Xorshift[1] random number
-/// generator.
-///
-/// The Xorshift algorithm is not suitable for cryptographic purposes
-/// but is very fast. If you do not know for sure that it fits your
-/// requirements, use a more secure one such as `IsaacRng` or `OsRng`.
-///
-/// [1]: Marsaglia, George (July 2003). ["Xorshift
-/// RNGs"](https://www.jstatsoft.org/v08/i14/paper). *Journal of
-/// Statistical Software*. Vol. 8 (Issue 14).
-#[allow(missing_copy_implementations)]
-#[derive(Clone, Debug)]
-pub struct XorShiftRng {
-    x: w32,
-    y: w32,
-    z: w32,
-    w: w32,
-}
-
-impl XorShiftRng {
-    /// Creates a new XorShiftRng instance which is not seeded.
-    ///
-    /// The initial values of this RNG are constants, so all generators created
-    /// by this function will yield the same stream of random numbers. It is
-    /// highly recommended that this is created through `SeedableRng` instead of
-    /// this function
-    pub fn new_unseeded() -> XorShiftRng {
-        XorShiftRng {
-            x: w(0x193a6754),
-            y: w(0xa8a7d469),
-            z: w(0x97830e05),
-            w: w(0x113ba7bb),
-        }
-    }
-}
-
-impl Rng for XorShiftRng {
-    #[inline]
-    fn next_u32(&mut self) -> u32 {
-        let x = self.x;
-        let t = x ^ (x << 11);
-        self.x = self.y;
-        self.y = self.z;
-        self.z = self.w;
-        let w_ = self.w;
-        self.w = w_ ^ (w_ >> 19) ^ (t ^ (t >> 8));
-        self.w.0
-    }
-}
-
-impl SeedableRng<[u32; 4]> for XorShiftRng {
-    /// Reseed an XorShiftRng. This will panic if `seed` is entirely 0.
-    fn reseed(&mut self, seed: [u32; 4]) {
-        assert!(!seed.iter().all(|&x| x == 0),
-                "XorShiftRng.reseed called with an all zero seed.");
-
-        self.x = w(seed[0]);
-        self.y = w(seed[1]);
-        self.z = w(seed[2]);
-        self.w = w(seed[3]);
-    }
-
-    /// Create a new XorShiftRng. This will panic if `seed` is entirely 0.
-    fn from_seed(seed: [u32; 4]) -> XorShiftRng {
-        assert!(!seed.iter().all(|&x| x == 0),
-                "XorShiftRng::from_seed called with an all zero seed.");
-
-        XorShiftRng {
-            x: w(seed[0]),
-            y: w(seed[1]),
-            z: w(seed[2]),
-            w: w(seed[3]),
-        }
-    }
-}
-
-impl Rand for XorShiftRng {
-    fn rand<R: Rng>(rng: &mut R) -> XorShiftRng {
-        let mut tuple: (u32, u32, u32, u32) = rng.gen();
-        while tuple == (0, 0, 0, 0) {
-            tuple = rng.gen();
-        }
-        let (x, y, z, w_) = tuple;
-        XorShiftRng { x: w(x), y: w(y), z: w(z), w: w(w_) }
-    }
-}
-
 /// A wrapper for generating floating point numbers uniformly in the
 /// open interval `(0,1)` (not including either endpoint).
 ///
@@ -838,7 +759,7 @@ pub struct Closed01<F>(pub F);
 
 /// The standard RNG. This is designed to be efficient on the current
 /// platform.
-#[derive(Copy, Clone, Debug)]
+#[derive(Clone, Debug)]
 pub struct StdRng {
     rng: IsaacWordRng,
 }
@@ -855,8 +776,19 @@ impl StdRng {
     ///
     /// Reading the randomness from the OS may fail, and any error is
     /// propagated via the `io::Result` return value.
+    #[cfg(feature="std")]
     pub fn new() -> io::Result<StdRng> {
-        OsRng::new().map(|mut r| StdRng { rng: r.gen() })
+        match OsRng::new() {
+            Ok(mut r) => Ok(StdRng { rng: r.gen() }),
+            Err(e1) => {
+                match JitterRng::new() {
+                    Ok(mut r) => Ok(StdRng { rng: r.gen() }),
+                    Err(_) => {
+                        Err(e1)
+                    }
+                }
+            }
+        }
     }
 }
 
@@ -870,6 +802,11 @@ impl Rng for StdRng {
     fn next_u64(&mut self) -> u64 {
         self.rng.next_u64()
     }
+
+    #[inline]
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        self.rng.fill_bytes(dest)
+    }
 }
 
 impl<'a> SeedableRng<&'a [usize]> for StdRng {
@@ -891,31 +828,33 @@ impl<'a> SeedableRng<&'a [usize]> for StdRng {
 /// seeded `Rng` for consistency over time you should pick one algorithm and
 /// create the `Rng` yourself.
 ///
-/// This will read randomness from the operating system to seed the
-/// generator.
+/// This will seed the generator with randomness from thread_rng.
+#[cfg(feature="std")]
 pub fn weak_rng() -> XorShiftRng {
-    match OsRng::new() {
-        Ok(mut r) => r.gen(),
-        Err(e) => panic!("weak_rng: failed to create seeded RNG: {:?}", e)
-    }
+    thread_rng().gen()
 }
 
 /// Controls how the thread-local RNG is reseeded.
+#[cfg(feature="std")]
 #[derive(Debug)]
 struct ThreadRngReseeder;
 
+#[cfg(feature="std")]
 impl reseeding::Reseeder<StdRng> for ThreadRngReseeder {
     fn reseed(&mut self, rng: &mut StdRng) {
-        *rng = match StdRng::new() {
-            Ok(r) => r,
-            Err(e) => panic!("could not reseed thread_rng: {}", e)
+        match StdRng::new() {
+            Ok(r) => *rng = r,
+            Err(e) => panic!("No entropy available: {}", e),
         }
     }
 }
+#[cfg(feature="std")]
 const THREAD_RNG_RESEED_THRESHOLD: u64 = 32_768;
+#[cfg(feature="std")]
 type ThreadRngInner = reseeding::ReseedingRng<StdRng, ThreadRngReseeder>;
 
 /// The thread-local RNG.
+#[cfg(feature="std")]
 #[derive(Clone, Debug)]
 pub struct ThreadRng {
     rng: Rc<RefCell<ThreadRngInner>>,
@@ -925,19 +864,21 @@ pub struct ThreadRng {
 /// generator, seeded by the system. Intended to be used in method
 /// chaining style, e.g. `thread_rng().gen::<i32>()`.
 ///
-/// The RNG provided will reseed itself from the operating system
-/// after generating a certain amount of randomness.
+/// After generating a certain amount of randomness, the RNG will reseed itself
+/// from the operating system or, if the operating system RNG returns an error,
+/// a seed based on the current system time.
 ///
 /// The internal RNG used is platform and architecture dependent, even
 /// if the operating system random number generator is rigged to give
 /// the same sequence always. If absolute consistency is required,
 /// explicitly select an RNG, e.g. `IsaacRng` or `Isaac64Rng`.
+#[cfg(feature="std")]
 pub fn thread_rng() -> ThreadRng {
     // used to make space in TLS for a random number generator
     thread_local!(static THREAD_RNG_KEY: Rc<RefCell<ThreadRngInner>> = {
         let r = match StdRng::new() {
             Ok(r) => r,
-            Err(e) => panic!("could not initialize thread_rng: {}", e)
+            Err(e) => panic!("No entropy available: {}", e),
         };
         let rng = reseeding::ReseedingRng::new(r,
                                                THREAD_RNG_RESEED_THRESHOLD,
@@ -948,6 +889,7 @@ pub fn thread_rng() -> ThreadRng {
     ThreadRng { rng: THREAD_RNG_KEY.with(|t| t.clone()) }
 }
 
+#[cfg(feature="std")]
 impl Rng for ThreadRng {
     fn next_u32(&mut self) -> u32 {
         self.rng.borrow_mut().next_u32()
@@ -997,7 +939,7 @@ impl Rng for ThreadRng {
 ///     *x = rand::random()
 /// }
 ///
-/// // would be faster as
+/// // can be made faster by caching thread_rng
 ///
 /// let mut rng = rand::thread_rng();
 ///
@@ -1005,11 +947,14 @@ impl Rng for ThreadRng {
 ///     *x = rng.gen();
 /// }
 /// ```
+#[cfg(feature="std")]
 #[inline]
 pub fn random<T: Rand>() -> T {
     thread_rng().gen()
 }
 
+/// DEPRECATED: use `seq::sample_iter` instead.
+///
 /// Randomly sample up to `amount` elements from a finite iterator.
 /// The order of elements in the sample is not random.
 ///
@@ -1022,37 +967,35 @@ pub fn random<T: Rand>() -> T {
 /// let sample = sample(&mut rng, 1..100, 5);
 /// println!("{:?}", sample);
 /// ```
+#[cfg(feature="std")]
+#[inline(always)]
+#[deprecated(since="0.4.0", note="renamed to seq::sample_iter")]
 pub fn sample<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Vec<T>
     where I: IntoIterator<Item=T>,
           R: Rng,
 {
-    let mut iter = iterable.into_iter();
-    let mut reservoir: Vec<T> = iter.by_ref().take(amount).collect();
-    // continue unless the iterator was exhausted
-    if reservoir.len() == amount {
-        for (i, elem) in iter.enumerate() {
-            let k = rng.gen_range(0, i + 1 + amount);
-            if let Some(spot) = reservoir.get_mut(k) {
-                *spot = elem;
-            }
-        }
-    }
-    reservoir
+    // the legacy sample didn't care whether amount was met
+    seq::sample_iter(rng, iterable, amount)
+        .unwrap_or_else(|e| e)
 }
 
 #[cfg(test)]
 mod test {
-    use super::{Rng, thread_rng, random, SeedableRng, StdRng, sample};
+    use impls;
+    use super::{Rng, thread_rng, random, SeedableRng, StdRng, weak_rng};
     use std::iter::repeat;
 
     pub struct MyRng<R> { inner: R }
 
     impl<R: Rng> Rng for MyRng<R> {
         fn next_u32(&mut self) -> u32 {
-            fn next<T: Rng>(t: &mut T) -> u32 {
-                t.next_u32()
-            }
-            next(&mut self.inner)
+            self.inner.next_u32()
+        }
+        fn next_u64(&mut self) -> u64 {
+            self.inner.next_u64()
+        }
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            self.inner.fill_bytes(dest)
         }
     }
 
@@ -1065,7 +1008,9 @@ mod test {
         fn next_u32(&mut self) -> u32 { self.i as u32 }
         fn next_u64(&mut self) -> u64 { self.i }
 
-        // no fill_bytes on purpose
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u64(self, dest)
+        }
     }
 
     pub fn iter_eq<I, J>(i: I, j: J) -> bool
@@ -1138,14 +1083,6 @@ mod test {
         r.gen_range(5, 2);
     }
 
-    #[test]
-    fn test_gen_f64() {
-        let mut r = thread_rng();
-        let a = r.gen::<f64>();
-        let b = r.gen::<f64>();
-        debug!("{:?}", (a, b));
-    }
-
     #[test]
     fn test_gen_weighted_bool() {
         let mut r = thread_rng();
@@ -1248,24 +1185,6 @@ mod test {
                      (f32, (f64, (f64,)))) = random();
     }
 
-    #[test]
-    fn test_sample() {
-        let min_val = 1;
-        let max_val = 100;
-
-        let mut r = thread_rng();
-        let vals = (min_val..max_val).collect::<Vec<i32>>();
-        let small_sample = sample(&mut r, vals.iter(), 5);
-        let large_sample = sample(&mut r, vals.iter(), vals.len() + 5);
-
-        assert_eq!(small_sample.len(), 5);
-        assert_eq!(large_sample.len(), vals.len());
-
-        assert!(small_sample.iter().all(|e| {
-            **e >= min_val && **e <= max_val
-        }));
-    }
-
     #[test]
     fn test_std_rng_seeded() {
         let s = thread_rng().gen_iter::<usize>().take(256).collect::<Vec<usize>>();
@@ -1286,4 +1205,13 @@ mod test {
         let string2 = r.gen_ascii_chars().take(100).collect::<String>();
         assert_eq!(string1, string2);
     }
+
+    #[test]
+    fn test_weak_rng() {
+        let s = weak_rng().gen_iter::<usize>().take(256).collect::<Vec<usize>>();
+        let mut ra: StdRng = SeedableRng::from_seed(&s[..]);
+        let mut rb: StdRng = SeedableRng::from_seed(&s[..]);
+        assert!(iter_eq(ra.gen_ascii_chars().take(100),
+                        rb.gen_ascii_chars().take(100)));
+    }
 }
diff --git a/src/os.rs b/src/os.rs
index b8437858..3526e85f 100644
--- a/src/os.rs
+++ b/src/os.rs
@@ -53,13 +53,13 @@ impl fmt::Debug for OsRng {
     }
 }
 
-fn next_u32(mut fill_buf: &mut FnMut(&mut [u8])) -> u32 {
+fn next_u32(fill_buf: &mut FnMut(&mut [u8])) -> u32 {
     let mut buf: [u8; 4] = [0; 4];
     fill_buf(&mut buf);
     unsafe { mem::transmute::<[u8; 4], u32>(buf) }
 }
 
-fn next_u64(mut fill_buf: &mut FnMut(&mut [u8])) -> u64 {
+fn next_u64(fill_buf: &mut FnMut(&mut [u8])) -> u64 {
     let mut buf: [u8; 8] = [0; 8];
     fill_buf(&mut buf);
     unsafe { mem::transmute::<[u8; 8], u64>(buf) }
@@ -407,7 +407,7 @@ mod imp {
             next_u64(&mut |v| self.fill_bytes(v))
         }
         fn fill_bytes(&mut self, v: &mut [u8]) {
-            for s in v.chunks_mut(fuchsia_zircon::ZX_CPRNG_DRAW_MAX_LEN) {
+            for s in v.chunks_mut(fuchsia_zircon::sys::ZX_CPRNG_DRAW_MAX_LEN) {
                 let mut filled = 0;
                 while filled < s.len() {
                     match fuchsia_zircon::cprng_draw(&mut s[filled..]) {
@@ -422,19 +422,16 @@ mod imp {
 
 #[cfg(windows)]
 mod imp {
+    extern crate winapi;
+
     use std::io;
     use Rng;
 
     use super::{next_u32, next_u64};
 
-    type BOOLEAN = u8;
-    type ULONG = u32;
-
-    #[link(name = "advapi32")]
-    extern "system" {
-        // This function's real name is `RtlGenRandom`.
-        fn SystemFunction036(RandomBuffer: *mut u8, RandomBufferLength: ULONG) -> BOOLEAN;
-    }
+    use self::winapi::shared::minwindef::ULONG;
+    use self::winapi::um::ntsecapi::RtlGenRandom;
+    use self::winapi::um::winnt::PVOID;
 
     #[derive(Debug)]
     pub struct OsRng;
@@ -457,7 +454,7 @@ mod imp {
             // split up the buffer.
             for slice in v.chunks_mut(<ULONG>::max_value() as usize) {
                 let ret = unsafe {
-                    SystemFunction036(slice.as_mut_ptr(), slice.len() as ULONG)
+                    RtlGenRandom(slice.as_mut_ptr() as PVOID, slice.len() as ULONG)
                 };
                 if ret == 0 {
                     panic!("couldn't generate random bytes: {}",
@@ -544,6 +541,26 @@ mod imp {
     }
 }
 
+#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))]
+mod imp {
+    use std::io;
+    use Rng;
+
+    #[derive(Debug)]
+    pub struct OsRng;
+
+    impl OsRng {
+        pub fn new() -> io::Result<OsRng> {
+            Err(io::Error::new(io::ErrorKind::Other, "Not supported"))
+        }
+    }
+
+    impl Rng for OsRng {
+        fn next_u32(&mut self) -> u32 {
+            panic!("Not supported")
+        }
+    }
+}
 
 #[cfg(test)]
 mod test {
diff --git a/src/chacha.rs b/src/prng/chacha.rs
similarity index 72%
rename from src/chacha.rs
rename to src/prng/chacha.rs
index 7431fb3b..6c4edca0 100644
--- a/src/chacha.rs
+++ b/src/prng/chacha.rs
@@ -10,8 +10,9 @@
 
 //! The ChaCha random number generator.
 
-use std::num::Wrapping as w;
-use {Rng, SeedableRng, Rand, w32};
+use core::fmt;
+use {Rng, SeedableRng, Rand};
+use impls;
 
 const KEY_WORDS    : usize =  8; // 8 words for the 256-bit key
 const STATE_WORDS  : usize = 16;
@@ -26,26 +27,26 @@ const CHACHA_ROUNDS: u32 = 20; // Cryptographically secure from 8 upwards as of
 ///
 /// [1]: D. J. Bernstein, [*ChaCha, a variant of
 /// Salsa20*](https://cr.yp.to/chacha.html)
-#[derive(Copy, Clone, Debug)]
+#[derive(Clone)]
 pub struct ChaChaRng {
-    buffer:  [w32; STATE_WORDS], // Internal buffer of output
-    state:   [w32; STATE_WORDS], // Initial state
-    index:   usize,                 // Index into state
+    buffer:  [u32; STATE_WORDS], // Internal buffer of output
+    state:   [u32; STATE_WORDS], // Initial state
+    index:   usize,              // Index into state
 }
 
-static EMPTY: ChaChaRng = ChaChaRng {
-    buffer:  [w(0); STATE_WORDS],
-    state:   [w(0); STATE_WORDS],
-    index:   STATE_WORDS
-};
-
+// Custom Debug implementation that does not expose the internal state
+impl fmt::Debug for ChaChaRng {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ChaChaRng {{}}")
+    }
+}
 
 macro_rules! quarter_round{
     ($a: expr, $b: expr, $c: expr, $d: expr) => {{
-        $a = $a + $b; $d = $d ^ $a; $d = w($d.0.rotate_left(16));
-        $c = $c + $d; $b = $b ^ $c; $b = w($b.0.rotate_left(12));
-        $a = $a + $b; $d = $d ^ $a; $d = w($d.0.rotate_left( 8));
-        $c = $c + $d; $b = $b ^ $c; $b = w($b.0.rotate_left( 7));
+        $a = $a.wrapping_add($b); $d ^= $a; $d = $d.rotate_left(16);
+        $c = $c.wrapping_add($d); $b ^= $c; $b = $b.rotate_left(12);
+        $a = $a.wrapping_add($b); $d ^= $a; $d = $d.rotate_left( 8);
+        $c = $c.wrapping_add($d); $b ^= $c; $b = $b.rotate_left( 7);
     }}
 }
 
@@ -65,15 +66,15 @@ macro_rules! double_round{
 }
 
 #[inline]
-fn core(output: &mut [w32; STATE_WORDS], input: &[w32; STATE_WORDS]) {
-    *output = *input;
+fn core(new: &mut [u32; STATE_WORDS], input: &[u32; STATE_WORDS]) {
+    *new = *input;
 
     for _ in 0..CHACHA_ROUNDS / 2 {
-        double_round!(output);
+        double_round!(new);
     }
 
     for i in 0..STATE_WORDS {
-        output[i] = output[i] + input[i];
+        new[i] = new[i].wrapping_add(input[i]);
     }
 }
 
@@ -99,7 +100,11 @@ impl ChaChaRng {
     /// - 2917185654
     /// - 2419978656
     pub fn new_unseeded() -> ChaChaRng {
-        let mut rng = EMPTY;
+        let mut rng = ChaChaRng {
+            buffer:  [0; STATE_WORDS],
+            state:   [0; STATE_WORDS],
+            index:   STATE_WORDS
+        };
         rng.init(&[0; KEY_WORDS]);
         rng
     }
@@ -124,10 +129,10 @@ impl ChaChaRng {
     /// println!("{:?}", ra.next_u32());
     /// ```
     pub fn set_counter(&mut self, counter_low: u64, counter_high: u64) {
-        self.state[12] = w((counter_low >>  0) as u32);
-        self.state[13] = w((counter_low >> 32) as u32);
-        self.state[14] = w((counter_high >>  0) as u32);
-        self.state[15] = w((counter_high >> 32) as u32);
+        self.state[12] = (counter_low >>  0) as u32;
+        self.state[13] = (counter_low >> 32) as u32;
+        self.state[14] = (counter_high >>  0) as u32;
+        self.state[15] = (counter_high >> 32) as u32;
         self.index = STATE_WORDS; // force recomputation
     }
 
@@ -150,19 +155,19 @@ impl ChaChaRng {
     /// [1]: Daniel J. Bernstein. [*Extending the Salsa20
     /// nonce.*](https://cr.yp.to/papers.html#xsalsa)
     fn init(&mut self, key: &[u32; KEY_WORDS]) {
-        self.state[0] = w(0x61707865);
-        self.state[1] = w(0x3320646E);
-        self.state[2] = w(0x79622D32);
-        self.state[3] = w(0x6B206574);
+        self.state[0] = 0x61707865;
+        self.state[1] = 0x3320646E;
+        self.state[2] = 0x79622D32;
+        self.state[3] = 0x6B206574;
 
         for i in 0..KEY_WORDS {
-            self.state[4+i] = w(key[i]);
+            self.state[4+i] = key[i];
         }
 
-        self.state[12] = w(0);
-        self.state[13] = w(0);
-        self.state[14] = w(0);
-        self.state[15] = w(0);
+        self.state[12] = 0;
+        self.state[13] = 0;
+        self.state[14] = 0;
+        self.state[15] = 0;
 
         self.index = STATE_WORDS;
     }
@@ -172,39 +177,46 @@ impl ChaChaRng {
         core(&mut self.buffer, &self.state);
         self.index = 0;
         // update 128-bit counter
-        self.state[12] = self.state[12] + w(1);
-        if self.state[12] != w(0) { return };
-        self.state[13] = self.state[13] + w(1);
-        if self.state[13] != w(0) { return };
-        self.state[14] = self.state[14] + w(1);
-        if self.state[14] != w(0) { return };
-        self.state[15] = self.state[15] + w(1);
+        self.state[12] = self.state[12].wrapping_add(1);
+        if self.state[12] != 0 { return };
+        self.state[13] = self.state[13].wrapping_add(1);
+        if self.state[13] != 0 { return };
+        self.state[14] = self.state[14].wrapping_add(1);
+        if self.state[14] != 0 { return };
+        self.state[15] = self.state[15].wrapping_add(1);
     }
 }
 
 impl Rng for ChaChaRng {
     #[inline]
     fn next_u32(&mut self) -> u32 {
-        if self.index == STATE_WORDS {
+        // Using a local variable for `index`, and checking the size avoids a
+        // bounds check later on.
+        let mut index = self.index as usize;
+        if index >= STATE_WORDS {
             self.update();
+            index = 0;
         }
 
-        let value = self.buffer[self.index % STATE_WORDS];
+        let value = self.buffer[index];
         self.index += 1;
-        value.0
+        value
+    }
+
+    fn next_u64(&mut self) -> u64 {
+        impls::next_u64_via_u32(self)
+    }
+
+
+    fn fill_bytes(&mut self, bytes: &mut [u8]) {
+        impls::fill_bytes_via_u32(self, bytes)
     }
 }
 
 impl<'a> SeedableRng<&'a [u32]> for ChaChaRng {
 
     fn reseed(&mut self, seed: &'a [u32]) {
-        // reset state
-        self.init(&[0u32; KEY_WORDS]);
-        // set key in place
-        let key = &mut self.state[4 .. 4+KEY_WORDS];
-        for (k, s) in key.iter_mut().zip(seed.iter()) {
-            *k = w(*s);
-        }
+        *self = Self::from_seed(seed);
     }
 
     /// Create a ChaCha generator from a seed,
@@ -212,8 +224,19 @@ impl<'a> SeedableRng<&'a [u32]> for ChaChaRng {
     /// Only up to 8 words are used; if less than 8
     /// words are used, the remaining are set to zero.
     fn from_seed(seed: &'a [u32]) -> ChaChaRng {
-        let mut rng = EMPTY;
-        rng.reseed(seed);
+        let mut rng = ChaChaRng {
+            buffer:  [0; STATE_WORDS],
+            state:   [0; STATE_WORDS],
+            index:   STATE_WORDS
+        };
+        rng.init(&[0u32; KEY_WORDS]);
+        // set key in place
+        {
+            let key = &mut rng.state[4 .. 4+KEY_WORDS];
+            for (k, s) in key.iter_mut().zip(seed.iter()) {
+                *k = *s;
+            }
+        }
         rng
     }
 }
@@ -306,6 +329,20 @@ mod test {
                         0x2c5bad8f, 0x898881dc, 0x5f1c86d9, 0xc1f8e7f4));
     }
 
+    #[test]
+    fn test_rng_true_bytes() {
+        let seed : &[_] = &[0u32; 8];
+        let mut ra: ChaChaRng = SeedableRng::from_seed(seed);
+        let mut buf = [0u8; 32];
+        ra.fill_bytes(&mut buf);
+        // Same as first values in test_isaac_true_values as bytes in LE order
+        assert_eq!(buf,
+                   [118, 184, 224, 173, 160, 241, 61, 144,
+                    64, 93, 106, 229, 83, 134, 189, 40,
+                    189, 210, 25, 184, 160, 141, 237, 26,
+                    168, 54, 239, 204, 139, 119, 13, 199]);
+    }
+
     #[test]
     fn test_rng_clone() {
         let seed : &[_] = &[0u32; 8];
diff --git a/src/prng/isaac.rs b/src/prng/isaac.rs
new file mode 100644
index 00000000..f694c74b
--- /dev/null
+++ b/src/prng/isaac.rs
@@ -0,0 +1,471 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! The ISAAC random number generator.
+
+use core::slice;
+use core::iter::repeat;
+use core::num::Wrapping as w;
+use core::fmt;
+
+use {Rng, SeedableRng, Rand};
+use impls;
+
+#[allow(non_camel_case_types)]
+type w32 = w<u32>;
+
+const RAND_SIZE_LEN: usize = 8;
+const RAND_SIZE: usize = 1 << RAND_SIZE_LEN;
+
+/// A random number generator that uses the ISAAC algorithm.
+///
+/// ISAAC stands for "Indirection, Shift, Accumulate, Add, and Count" which are
+/// the principal bitwise operations employed. It is the most advanced of a
+/// series of array based random number generator designed by Robert Jenkins
+/// in 1996[1][2].
+///
+/// Although ISAAC is designed to be cryptographically secure, its design is not
+/// founded in cryptographic theory. Therefore it is _not recommended for_
+/// cryptographic purposes. It is however one of the strongest non-cryptograpic
+/// RNGs, and that while still being reasonably fast.
+///
+/// Where fast random numbers are needed which should still be secure, but where
+/// speed is more important than absolute (cryptographic) security (e.g. to
+/// initialise hashes in the std library), a generator like ISAAC may be a good
+/// choice.
+///
+/// In 2006 an improvement to ISAAC was suggested by Jean-Philippe Aumasson,
+/// named ISAAC+[3]. But because the specification is not complete, there is no
+/// good implementation, and because the suggested bias may not exist, it is not
+/// implemented here.
+///
+/// ## Overview of the ISAAC algorithm:
+/// (in pseudo-code)
+///
+/// ```text
+/// Input: a, b, c, s[256] // state
+/// Output: r[256]         // results
+///
+/// mix(a,i) = a ^ a << 13   if i = 0 mod 4
+///            a ^ a >>  6   if i = 1 mod 4
+///            a ^ a <<  2   if i = 2 mod 4
+///            a ^ a >> 16   if i = 3 mod 4
+///
+/// c = c + 1
+/// b = b + c
+///
+/// for i in 0..256 {
+///     x = s_[i]
+///     a = f(a,i) + s[i+128 mod 256]
+///     y = a + b + s[x>>2 mod 256]
+///     s[i] = y
+///     b = x + s[y>>10 mod 256]
+///     r[i] = b
+/// }
+/// ```
+///
+/// Numbers are generated in blocks of 256. This means the function above only
+/// runs once every 256 times you ask for a next random number. In all other
+/// circumstances the last element of the results array is returned.
+///
+/// ISAAC therefore needs a lot of memory, relative to other non-vrypto RNGs.
+/// 2 * 256 * 4 = 2 kb to hold the state and results.
+///
+/// ## References
+/// [1]: Bob Jenkins, [*ISAAC: A fast cryptographic random number generator*]
+///      (http://burtleburtle.net/bob/rand/isaacafa.html)
+///
+/// [2]: Bob Jenkins, [*ISAAC and RC4*]
+///      (http://burtleburtle.net/bob/rand/isaac.html)
+///
+/// [3]: Jean-Philippe Aumasson, [*On the pseudo-random generator ISAAC*]
+///      (http://eprint.iacr.org/2006/438)
+pub struct IsaacRng {
+    rsl: [u32; RAND_SIZE],
+    mem: [w32; RAND_SIZE],
+    a: w32,
+    b: w32,
+    c: w32,
+    index: u32,
+}
+
+// Cannot be derived because [u32; 256] does not implement Clone
+// FIXME: remove once RFC 2000 gets implemented
+impl Clone for IsaacRng {
+    fn clone(&self) -> IsaacRng {
+        IsaacRng {
+            rsl: self.rsl,
+            mem: self.mem,
+            a: self.a,
+            b: self.b,
+            c: self.c,
+            index: self.index,
+        }
+    }
+}
+
+// Custom Debug implementation that does not expose the internal state
+impl fmt::Debug for IsaacRng {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "IsaacRng {{}}")
+    }
+}
+
+impl IsaacRng {
+    /// Create an ISAAC random number generator using the default
+    /// fixed seed.
+    pub fn new_unseeded() -> IsaacRng {
+        Self::new_from_u64(0)
+    }
+
+    /// Creates an ISAAC random number generator using an u64 as seed.
+    /// If `seed == 0` this will produce the same stream of random numbers as
+    /// the reference implementation when used unseeded.
+    pub fn new_from_u64(seed: u64) -> IsaacRng {
+        let mut key = [w(0); RAND_SIZE];
+        key[0] = w(seed as u32);
+        key[1] = w((seed >> 32) as u32);
+        // Initialize with only one pass.
+        // A second pass does not improve the quality here, because all of
+        // the seed was already available in the first round.
+        // Not doing the second pass has the small advantage that if `seed == 0`
+        // this method produces exactly the same state as the reference
+        // implementation when used unseeded.
+        init(key, 1)
+    }
+
+    /// Refills the output buffer (`self.rsl`)
+    /// See also the pseudocode desciption of the algorithm at the top of this
+    /// file.
+    ///
+    /// Optimisations used (similar to the reference implementation):
+    /// - The loop is unrolled 4 times, once for every constant of mix().
+    /// - The contents of the main loop are moved to a function `rngstep`, to
+    ///   reduce code duplication.
+    /// - We use local variables for a and b, which helps with optimisations.
+    /// - We split the main loop in two, one that operates over 0..128 and one
+    ///   over 128..256. This way we can optimise out the addition and modulus
+    ///   from `s[i+128 mod 256]`.
+    /// - We maintain one index `i` and add `m` or `m2` as base (m2 for the
+    ///   `s[i+128 mod 256]`), relying on the optimizer to turn it into pointer
+    ///   arithmetic.
+    /// - We fill `rsl` backwards. The reference implementation reads values
+    ///   from `rsl` in reverse. We read them in the normal direction, to make
+    ///   `fill_bytes` a memcopy. To maintain compatibility we fill in reverse.
+    fn isaac(&mut self) {
+        self.c += w(1);
+        // abbreviations
+        let mut a = self.a;
+        let mut b = self.b + self.c;
+        const MIDPOINT: usize = RAND_SIZE / 2;
+
+        #[inline]
+        fn ind(mem:&[w32; RAND_SIZE], v: w32, amount: usize) -> w32 {
+            let index = (v >> amount).0 as usize % RAND_SIZE;
+            mem[index]
+        }
+
+        #[inline]
+        fn rngstep(ctx: &mut IsaacRng,
+                   mix: w32,
+                   a: &mut w32,
+                   b: &mut w32,
+                   base: usize,
+                   m: usize,
+                   m2: usize) {
+            let x = ctx.mem[base + m];
+            *a = mix + ctx.mem[base + m2];
+            let y = *a + *b + ind(&ctx.mem, x, 2);
+            ctx.mem[base + m] = y;
+            *b = x + ind(&ctx.mem, y, 2 + RAND_SIZE_LEN);
+            ctx.rsl[RAND_SIZE - 1 - base - m] = (*b).0;
+        }
+
+        let mut m = 0;
+        let mut m2 = MIDPOINT;
+        for i in (0..MIDPOINT/4).map(|i| i * 4) {
+            rngstep(self, a ^ (a << 13), &mut a, &mut b, i + 0, m, m2);
+            rngstep(self, a ^ (a >> 6 ),  &mut a, &mut b, i + 1, m, m2);
+            rngstep(self, a ^ (a << 2 ),  &mut a, &mut b, i + 2, m, m2);
+            rngstep(self, a ^ (a >> 16),  &mut a, &mut b, i + 3, m, m2);
+        }
+
+        m = MIDPOINT;
+        m2 = 0;
+        for i in (0..MIDPOINT/4).map(|i| i * 4) {
+            rngstep(self, a ^ (a << 13), &mut a, &mut b, i + 0, m, m2);
+            rngstep(self, a ^ (a >> 6 ),  &mut a, &mut b, i + 1, m, m2);
+            rngstep(self, a ^ (a << 2 ),  &mut a, &mut b, i + 2, m, m2);
+            rngstep(self, a ^ (a >> 16),  &mut a, &mut b, i + 3, m, m2);
+        }
+
+        self.a = a;
+        self.b = b;
+        self.index = 0;
+    }
+}
+
+impl Rng for IsaacRng {
+    #[inline]
+    fn next_u32(&mut self) -> u32 {
+        // Using a local variable for `index`, and checking the size avoids a
+        // bounds check later on.
+        let mut index = self.index as usize;
+        if index >= RAND_SIZE {
+            self.isaac();
+            index = 0;
+        }
+
+        let value = self.rsl[index];
+        self.index += 1;
+        value
+    }
+
+    #[inline]
+    fn next_u64(&mut self) -> u64 {
+        impls::next_u64_via_u32(self)
+    }
+
+
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        let mut read_len = 0;
+        while read_len < dest.len() {
+            if self.index as usize >= RAND_SIZE {
+                self.isaac();
+            }
+
+            let (consumed_u32, filled_u8) =
+                impls::fill_via_u32_chunks(&mut self.rsl[(self.index as usize)..],
+                                           &mut dest[read_len..]);
+
+            self.index += consumed_u32 as u32;
+            read_len += filled_u8;
+        }
+    }
+}
+
+/// Creates a new ISAAC random number generator.
+///
+/// The author Bob Jenkins describes how to best initialize ISAAC here:
+/// https://rt.cpan.org/Public/Bug/Display.html?id=64324
+/// The answer is included here just in case:
+///
+/// "No, you don't need a full 8192 bits of seed data. Normal key sizes will do
+/// fine, and they should have their expected strength (eg a 40-bit key will
+/// take as much time to brute force as 40-bit keys usually will). You could
+/// fill the remainder with 0, but set the last array element to the length of
+/// the key provided (to distinguish keys that differ only by different amounts
+/// of 0 padding). You do still need to call randinit() to make sure the initial
+/// state isn't uniform-looking."
+/// "After publishing ISAAC, I wanted to limit the key to half the size of r[],
+/// and repeat it twice. That would have made it hard to provide a key that sets
+/// the whole internal state to anything convenient. But I'd already published
+/// it."
+///
+/// And his answer to the question "For my code, would repeating the key over
+/// and over to fill 256 integers be a better solution than zero-filling, or
+/// would they essentially be the same?":
+/// "If the seed is under 32 bytes, they're essentially the same, otherwise
+/// repeating the seed would be stronger. randinit() takes a chunk of 32 bytes,
+/// mixes it, and combines that with the next 32 bytes, et cetera. Then loops
+/// over all the elements the same way a second time."
+#[inline]
+fn init(mut mem: [w32; RAND_SIZE], rounds: u32) -> IsaacRng {
+    // These numbers are the result of initializing a...h with the
+    // fractional part of the golden ratio in binary (0x9e3779b9)
+    // and applying mix() 4 times.
+    let mut a = w(0x1367df5a);
+    let mut b = w(0x95d90059);
+    let mut c = w(0xc3163e4b);
+    let mut d = w(0x0f421ad8);
+    let mut e = w(0xd92a4a78);
+    let mut f = w(0xa51a3c49);
+    let mut g = w(0xc4efea1b);
+    let mut h = w(0x30609119);
+
+    // Normally this should do two passes, to make all of the seed effect all
+    // of `mem`
+    for _ in 0..rounds {
+        for i in (0..RAND_SIZE/8).map(|i| i * 8) {
+            a += mem[i  ]; b += mem[i+1];
+            c += mem[i+2]; d += mem[i+3];
+            e += mem[i+4]; f += mem[i+5];
+            g += mem[i+6]; h += mem[i+7];
+            mix(&mut a, &mut b, &mut c, &mut d,
+                &mut e, &mut f, &mut g, &mut h);
+            mem[i  ] = a; mem[i+1] = b;
+            mem[i+2] = c; mem[i+3] = d;
+            mem[i+4] = e; mem[i+5] = f;
+            mem[i+6] = g; mem[i+7] = h;
+        }
+    }
+
+    let mut rng = IsaacRng {
+        rsl: [0; RAND_SIZE],
+        mem: mem,
+        a: w(0),
+        b: w(0),
+        c: w(0),
+        index: 0,
+    };
+
+    // Prepare the first set of results
+    rng.isaac();
+    rng
+}
+
+fn mix(a: &mut w32, b: &mut w32, c: &mut w32, d: &mut w32,
+       e: &mut w32, f: &mut w32, g: &mut w32, h: &mut w32) {
+    *a ^= *b << 11; *d += *a; *b += *c;
+    *b ^= *c >> 2;  *e += *b; *c += *d;
+    *c ^= *d << 8;  *f += *c; *d += *e;
+    *d ^= *e >> 16; *g += *d; *e += *f;
+    *e ^= *f << 10; *h += *e; *f += *g;
+    *f ^= *g >> 4;  *a += *f; *g += *h;
+    *g ^= *h << 8;  *b += *g; *h += *a;
+    *h ^= *a >> 9;  *c += *h; *a += *b;
+}
+
+impl Rand for IsaacRng {
+    fn rand<R: Rng>(other: &mut R) -> IsaacRng {
+        let mut key = [w(0); RAND_SIZE];
+        unsafe {
+            let ptr = key.as_mut_ptr() as *mut u8;
+
+            let slice = slice::from_raw_parts_mut(ptr, RAND_SIZE * 4);
+            other.fill_bytes(slice);
+        }
+
+        init(key, 2)
+    }
+}
+
+impl<'a> SeedableRng<&'a [u32]> for IsaacRng {
+    fn reseed(&mut self, seed: &'a [u32]) {
+        *self = Self::from_seed(seed);
+    }
+    /// Create an ISAAC random number generator with a seed. This can
+    /// be any length, although the maximum number of elements used is
+    /// 256 and any more will be silently ignored. A generator
+    /// constructed with a given seed will generate the same sequence
+    /// of values as all other generators constructed with that seed.
+    fn from_seed(seed: &'a [u32]) -> IsaacRng {
+        let mut key = [w(0); RAND_SIZE];
+
+        // make the seed into [seed[0], seed[1], ..., seed[seed.len()
+        // - 1], 0, 0, ...], to fill `key`.
+        let seed_iter = seed.iter().map(|&x| x).chain(repeat(0u32));
+
+        for (rsl_elem, seed_elem) in key.iter_mut().zip(seed_iter) {
+            *rsl_elem = w(seed_elem);
+        }
+
+        init(key, 2)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use {Rng, SeedableRng};
+    use super::IsaacRng;
+
+    #[test]
+    fn test_rng_32_rand_seeded() {
+        let s = ::test::rng().gen_iter::<u32>().take(256).collect::<Vec<u32>>();
+        let mut ra: IsaacRng = SeedableRng::from_seed(&s[..]);
+        let mut rb: IsaacRng = SeedableRng::from_seed(&s[..]);
+        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
+                                rb.gen_ascii_chars().take(100)));
+    }
+
+    #[test]
+    fn test_rng_32_seeded() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut ra: IsaacRng = SeedableRng::from_seed(seed);
+        let mut rb: IsaacRng = SeedableRng::from_seed(seed);
+        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
+                                rb.gen_ascii_chars().take(100)));
+    }
+
+    #[test]
+    fn test_rng_32_reseed() {
+        let s = ::test::rng().gen_iter::<u32>().take(256).collect::<Vec<u32>>();
+        let mut r: IsaacRng = SeedableRng::from_seed(&s[..]);
+        let string1: String = r.gen_ascii_chars().take(100).collect();
+
+        r.reseed(&s[..]);
+
+        let string2: String = r.gen_ascii_chars().take(100).collect();
+        assert_eq!(string1, string2);
+    }
+
+    #[test]
+    fn test_rng_32_true_values() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut ra: IsaacRng = SeedableRng::from_seed(seed);
+        // Regression test that isaac is actually using the above vector
+        let v = (0..10).map(|_| ra.next_u32()).collect::<Vec<_>>();
+        assert_eq!(v,
+                   vec!(2558573138, 873787463, 263499565, 2103644246, 3595684709,
+                        4203127393, 264982119, 2765226902, 2737944514, 3900253796));
+
+        let seed: &[_] = &[12345, 67890, 54321, 9876];
+        let mut rb: IsaacRng = SeedableRng::from_seed(seed);
+        // skip forward to the 10000th number
+        for _ in 0..10000 { rb.next_u32(); }
+
+        let v = (0..10).map(|_| rb.next_u32()).collect::<Vec<_>>();
+        assert_eq!(v,
+                   vec!(3676831399, 3183332890, 2834741178, 3854698763, 2717568474,
+                        1576568959, 3507990155, 179069555, 141456972, 2478885421));
+    }
+
+    #[test]
+    fn test_rng_64_true_values() {
+        // As above, using little-endian versions of above values
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut ra: IsaacRng = SeedableRng::from_seed(seed);
+        // Regression test that isaac is actually using the above vector
+        let v = (0..5).map(|_| ra.next_u64()).collect::<Vec<_>>();
+        assert_eq!(v,
+                   vec!(3752888579798383186, 9035083239252078381, 18052294697452424037, 11876559110374379111, 16751462502657800130));
+    }
+
+    #[test]
+    fn test_isaac_true_bytes() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut rng1 = IsaacRng::from_seed(seed);
+        let mut buf = [0u8; 32];
+        rng1.fill_bytes(&mut buf);
+        // Same as first values in test_isaac_true_values as bytes in LE order
+        assert_eq!(buf,
+                   [82, 186, 128, 152, 71, 240, 20, 52,
+                    45, 175, 180, 15, 86, 16, 99, 125,
+                    101, 203, 81, 214, 97, 162, 134, 250,
+                    103, 78, 203, 15, 150, 3, 210, 164]);
+    }
+
+    #[test]
+    fn test_isaac_new_uninitialized() {
+        // Compare the results from initializing `IsaacRng` with
+        // `new_from_u64(0)`, to make sure it is the same as the reference
+        // implementation when used uninitialized.
+        // Note: We only test the first 16 integers, not the full 256 of the
+        // first block.
+        let mut rng = IsaacRng::new_from_u64(0);
+        let vec = (0..16).map(|_| rng.next_u32()).collect::<Vec<_>>();
+        let expected: [u32; 16] = [
+            0x71D71FD2, 0xB54ADAE7, 0xD4788559, 0xC36129FA,
+            0x21DC1EA9, 0x3CB879CA, 0xD83B237F, 0xFA3CE5BD,
+            0x8D048509, 0xD82E9489, 0xDB452848, 0xCA20E846,
+            0x500F972E, 0x0EEFF940, 0x00D6B993, 0xBC12C17F];
+        assert_eq!(vec, expected);
+    }
+}
diff --git a/src/prng/isaac64.rs b/src/prng/isaac64.rs
new file mode 100644
index 00000000..f95e8e84
--- /dev/null
+++ b/src/prng/isaac64.rs
@@ -0,0 +1,487 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! The ISAAC-64 random number generator.
+
+use core::slice;
+use core::iter::repeat;
+use core::num::Wrapping as w;
+use core::fmt;
+
+use {Rng, SeedableRng, Rand};
+use impls;
+
+#[allow(non_camel_case_types)]
+type w64 = w<u64>;
+
+const RAND_SIZE_LEN: usize = 8;
+const RAND_SIZE: usize = 1 << RAND_SIZE_LEN;
+
+/// A random number generator that uses ISAAC-64, the 64-bit variant of the
+/// ISAAC algorithm.
+///
+/// ISAAC stands for "Indirection, Shift, Accumulate, Add, and Count" which are
+/// the principal bitwise operations employed. It is the most advanced of a
+/// series of array based random number generator designed by Robert Jenkins
+/// in 1996[1].
+///
+/// Although ISAAC is designed to be cryptographically secure, its design is not
+/// founded in cryptographic theory. Therefore it is _not recommended for_
+/// cryptographic purposes. It is however one of the strongest non-cryptograpic
+/// RNGs, and that while still being reasonably fast.
+///
+/// ISAAC-64 is mostly similar to ISAAC. Because it operates on 64-bit integers
+/// instead of 32-bit, it uses twice as much memory to hold its state and
+/// results. Also it uses different constants for shifts and indirect indexing,
+/// optimized to give good results for 64bit arithmetic.
+///
+/// ## Overview of the ISAAC-64 algorithm:
+/// (in pseudo-code)
+///
+/// ```text
+/// Input: a, b, c, s[256] // state
+/// Output: r[256] // results
+///
+/// mix(a,i) = !(a ^ a << 21)  if i = 0 mod 4
+///              a ^ a >>  5   if i = 1 mod 4
+///              a ^ a << 12   if i = 2 mod 4
+///              a ^ a >> 33   if i = 3 mod 4
+///
+/// c = c + 1
+/// b = b + c
+///
+/// for i in 0..256 {
+///     x = s_[i]
+///     a = mix(a,i) + s[i+128 mod 256]
+///     y = a + b + s[x>>3 mod 256]
+///     s[i] = y
+///     b = x + s[y>>11 mod 256]
+///     r[i] = b
+/// }
+/// ```
+///
+/// See for more information the description in rand::prng::IsaacRng.
+///
+/// [1]: Bob Jenkins, [*ISAAC and RC4*]
+///      (http://burtleburtle.net/bob/rand/isaac.html)
+pub struct Isaac64Rng {
+    rsl: [u64; RAND_SIZE],
+    mem: [w64; RAND_SIZE],
+    a: w64,
+    b: w64,
+    c: w64,
+    index: u32,
+    half_used: bool, // true if only half of the previous result is used
+}
+
+// Cannot be derived because [u64; 256] does not implement Clone
+// FIXME: remove once RFC 2000 gets implemented
+impl Clone for Isaac64Rng {
+    fn clone(&self) -> Isaac64Rng {
+        Isaac64Rng {
+            rsl: self.rsl,
+            mem: self.mem,
+            a: self.a,
+            b: self.b,
+            c: self.c,
+            index: self.index,
+            half_used: self.half_used,
+        }
+    }
+}
+
+// Custom Debug implementation that does not expose the internal state
+impl fmt::Debug for Isaac64Rng {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Isaac64Rng {{}}")
+    }
+}
+
+impl Isaac64Rng {
+    /// Create a 64-bit ISAAC random number generator using the
+    /// default fixed seed.
+    pub fn new_unseeded() -> Isaac64Rng {
+        Self::new_from_u64(0)
+    }
+
+    /// Creates an ISAAC-64 random number generator using an u64 as seed.
+    /// If `seed == 0` this will produce the same stream of random numbers as
+    /// the reference implementation when used unseeded.
+    pub fn new_from_u64(seed: u64) -> Isaac64Rng {
+        let mut key = [w(0); RAND_SIZE];
+        key[0] = w(seed);
+        // Initialize with only one pass.
+        // A second pass does not improve the quality here, because all of
+        // the seed was already available in the first round.
+        // Not doing the second pass has the small advantage that if `seed == 0`
+        // this method produces exactly the same state as the reference
+        // implementation when used unseeded.
+        init(key, 1)
+    }
+
+    /// Refills the output buffer (`self.rsl`)
+    /// See also the pseudocode desciption of the algorithm at the top of this
+    /// file.
+    ///
+    /// Optimisations used (similar to the reference implementation):
+    /// - The loop is unrolled 4 times, once for every constant of mix().
+    /// - The contents of the main loop are moved to a function `rngstep`, to
+    ///   reduce code duplication.
+    /// - We use local variables for a and b, which helps with optimisations.
+    /// - We split the main loop in two, one that operates over 0..128 and one
+    ///   over 128..256. This way we can optimise out the addition and modulus
+    ///   from `s[i+128 mod 256]`.
+    /// - We maintain one index `i` and add `m` or `m2` as base (m2 for the
+    ///   `s[i+128 mod 256]`), relying on the optimizer to turn it into pointer
+    ///   arithmetic.
+    /// - We fill `rsl` backwards. The reference implementation reads values
+    ///   from `rsl` in reverse. We read them in the normal direction, to make
+    ///   `fill_bytes` a memcopy. To maintain compatibility we fill in reverse.
+    fn isaac64(&mut self) {
+        self.c += w(1);
+        // abbreviations
+        let mut a = self.a;
+        let mut b = self.b + self.c;
+        const MIDPOINT: usize = RAND_SIZE / 2;
+
+        #[inline]
+        fn ind(mem:&[w64; RAND_SIZE], v: w64, amount: usize) -> w64 {
+            let index = (v >> amount).0 as usize % RAND_SIZE;
+            mem[index]
+        }
+
+        #[inline]
+        fn rngstep(ctx: &mut Isaac64Rng,
+                   mix: w64,
+                   a: &mut w64,
+                   b: &mut w64,
+                   base: usize,
+                   m: usize,
+                   m2: usize) {
+            let x = ctx.mem[base + m];
+            *a = mix + ctx.mem[base + m2];
+            let y = *a + *b + ind(&ctx.mem, x, 3);
+            ctx.mem[base + m] = y;
+            *b = x + ind(&ctx.mem, y, 3 + RAND_SIZE_LEN);
+            ctx.rsl[RAND_SIZE - 1 - base - m] = (*b).0;
+        }
+
+        let mut m = 0;
+        let mut m2 = MIDPOINT;
+        for i in (0..MIDPOINT/4).map(|i| i * 4) {
+            rngstep(self, !(a ^ (a << 21)), &mut a, &mut b, i + 0, m, m2);
+            rngstep(self,   a ^ (a >> 5 ),  &mut a, &mut b, i + 1, m, m2);
+            rngstep(self,   a ^ (a << 12),  &mut a, &mut b, i + 2, m, m2);
+            rngstep(self,   a ^ (a >> 33),  &mut a, &mut b, i + 3, m, m2);
+        }
+
+        m = MIDPOINT;
+        m2 = 0;
+        for i in (0..MIDPOINT/4).map(|i| i * 4) {
+            rngstep(self, !(a ^ (a << 21)), &mut a, &mut b, i + 0, m, m2);
+            rngstep(self,   a ^ (a >> 5 ),  &mut a, &mut b, i + 1, m, m2);
+            rngstep(self,   a ^ (a << 12),  &mut a, &mut b, i + 2, m, m2);
+            rngstep(self,   a ^ (a >> 33),  &mut a, &mut b, i + 3, m, m2);
+        }
+
+        self.a = a;
+        self.b = b;
+        self.index = 0;
+        self.half_used = false;
+    }
+}
+
+impl Rng for Isaac64Rng {
+    #[inline]
+    fn next_u32(&mut self) -> u32 {
+        // Using a local variable for `index`, and checking the size avoids a
+        // bounds check later on.
+        let mut index = self.index as usize * 2 - self.half_used as usize;
+        if index >= RAND_SIZE * 2 {
+            self.isaac64();
+            index = 0;
+        }
+
+        self.half_used = !self.half_used;
+        self.index += self.half_used as u32;
+
+        // Index as if this is a u32 slice.
+        let rsl = unsafe { &*(&mut self.rsl as *mut [u64; RAND_SIZE]
+                                            as *mut [u32; RAND_SIZE * 2]) };
+
+        if cfg!(target_endian = "little") {
+            rsl[index]
+        } else {
+            rsl[index ^ 1]
+        }
+    }
+
+    #[inline]
+    fn next_u64(&mut self) -> u64 {
+        let mut index = self.index as usize;
+        if index >= RAND_SIZE {
+            self.isaac64();
+            index = 0;
+        }
+
+        let value = self.rsl[index];
+        self.index += 1;
+        self.half_used = false;
+        value
+    }
+
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        let mut read_len = 0;
+        while read_len < dest.len() {
+            if self.index as usize >= RAND_SIZE {
+                self.isaac64();
+            }
+
+            let (consumed_u64, filled_u8) =
+                impls::fill_via_u64_chunks(&mut self.rsl[self.index as usize..],
+                                           &mut dest[read_len..]);
+
+            self.index += consumed_u64 as u32;
+            read_len += filled_u8;
+        }
+    }
+}
+
+/// Creates a new ISAAC-64 random number generator.
+fn init(mut mem: [w64; RAND_SIZE], rounds: u32) -> Isaac64Rng {
+    // These numbers are the result of initializing a...h with the
+    // fractional part of the golden ratio in binary (0x9e3779b97f4a7c13)
+    // and applying mix() 4 times.
+    let mut a = w(0x647c4677a2884b7c);
+    let mut b = w(0xb9f8b322c73ac862);
+    let mut c = w(0x8c0ea5053d4712a0);
+    let mut d = w(0xb29b2e824a595524);
+    let mut e = w(0x82f053db8355e0ce);
+    let mut f = w(0x48fe4a0fa5a09315);
+    let mut g = w(0xae985bf2cbfc89ed);
+    let mut h = w(0x98f5704f6c44c0ab);
+
+    // Normally this should do two passes, to make all of the seed effect all
+    // of `mem`
+    for _ in 0..rounds {
+        for i in (0..RAND_SIZE/8).map(|i| i * 8) {
+            a += mem[i  ]; b += mem[i+1];
+            c += mem[i+2]; d += mem[i+3];
+            e += mem[i+4]; f += mem[i+5];
+            g += mem[i+6]; h += mem[i+7];
+            mix(&mut a, &mut b, &mut c, &mut d,
+                &mut e, &mut f, &mut g, &mut h);
+            mem[i  ] = a; mem[i+1] = b;
+            mem[i+2] = c; mem[i+3] = d;
+            mem[i+4] = e; mem[i+5] = f;
+            mem[i+6] = g; mem[i+7] = h;
+        }
+    }
+
+    let mut rng = Isaac64Rng {
+        rsl: [0; RAND_SIZE],
+        mem: mem,
+        a: w(0),
+        b: w(0),
+        c: w(0),
+        index: 0,
+        half_used: false,
+    };
+
+    // Prepare the first set of results
+    rng.isaac64();
+    rng
+}
+
+fn mix(a: &mut w64, b: &mut w64, c: &mut w64, d: &mut w64,
+       e: &mut w64, f: &mut w64, g: &mut w64, h: &mut w64) {
+    *a -= *e; *f ^= *h >> 9;  *h += *a;
+    *b -= *f; *g ^= *a << 9;  *a += *b;
+    *c -= *g; *h ^= *b >> 23; *b += *c;
+    *d -= *h; *a ^= *c << 15; *c += *d;
+    *e -= *a; *b ^= *d >> 14; *d += *e;
+    *f -= *b; *c ^= *e << 20; *e += *f;
+    *g -= *c; *d ^= *f >> 17; *f += *g;
+    *h -= *d; *e ^= *g << 14; *g += *h;
+}
+
+impl Rand for Isaac64Rng {
+    fn rand<R: Rng>(other: &mut R) -> Isaac64Rng {
+        let mut key = [w(0); RAND_SIZE];
+        unsafe {
+            let ptr = key.as_mut_ptr() as *mut u8;
+
+            let slice = slice::from_raw_parts_mut(ptr, RAND_SIZE * 8);
+            other.fill_bytes(slice);
+        }
+        init(key, 2)
+    }
+}
+
+impl<'a> SeedableRng<&'a [u64]> for Isaac64Rng {
+    fn reseed(&mut self, seed: &'a [u64]) {
+        *self = Self::from_seed(seed);
+    }
+
+    /// Create an ISAAC random number generator with a seed. This can
+    /// be any length, although the maximum number of elements used is
+    /// 256 and any more will be silently ignored. A generator
+    /// constructed with a given seed will generate the same sequence
+    /// of values as all other generators constructed with that seed.
+    fn from_seed(seed: &'a [u64]) -> Isaac64Rng {
+        let mut key = [w(0); RAND_SIZE];
+
+        // make the seed into [seed[0], seed[1], ..., seed[seed.len()
+        // - 1], 0, 0, ...], to fill `key`.
+        let seed_iter = seed.iter().map(|&x| x).chain(repeat(0u64));
+
+        for (rsl_elem, seed_elem) in key.iter_mut().zip(seed_iter) {
+            *rsl_elem = w(seed_elem);
+        }
+
+        init(key, 2)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use {Rng, SeedableRng};
+    use super::Isaac64Rng;
+
+    #[test]
+    fn test_rng_64_rand_seeded() {
+        let s = ::test::rng().gen_iter::<u64>().take(256).collect::<Vec<u64>>();
+        let mut ra: Isaac64Rng = SeedableRng::from_seed(&s[..]);
+        let mut rb: Isaac64Rng = SeedableRng::from_seed(&s[..]);
+        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
+                                rb.gen_ascii_chars().take(100)));
+    }
+
+    #[test]
+    fn test_rng_64_seeded() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut ra: Isaac64Rng = SeedableRng::from_seed(seed);
+        let mut rb: Isaac64Rng = SeedableRng::from_seed(seed);
+        assert!(::test::iter_eq(ra.gen_ascii_chars().take(100),
+                                rb.gen_ascii_chars().take(100)));
+    }
+
+    #[test]
+    fn test_rng_64_reseed() {
+        let s = ::test::rng().gen_iter::<u64>().take(256).collect::<Vec<u64>>();
+        let mut r: Isaac64Rng = SeedableRng::from_seed(&s[..]);
+        let string1: String = r.gen_ascii_chars().take(100).collect();
+
+        r.reseed(&s[..]);
+
+        let string2: String = r.gen_ascii_chars().take(100).collect();
+        assert_eq!(string1, string2);
+    }
+
+    #[test]
+    fn test_rng_64_true_values() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut ra: Isaac64Rng = SeedableRng::from_seed(seed);
+        // Regression test that isaac is actually using the above vector
+        let v = (0..10).map(|_| ra.next_u64()).collect::<Vec<_>>();
+        assert_eq!(v,
+                   vec!(547121783600835980, 14377643087320773276, 17351601304698403469,
+                        1238879483818134882, 11952566807690396487, 13970131091560099343,
+                        4469761996653280935, 15552757044682284409, 6860251611068737823,
+                        13722198873481261842));
+
+        let seed: &[_] = &[12345, 67890, 54321, 9876];
+        let mut rb: Isaac64Rng = SeedableRng::from_seed(seed);
+        // skip forward to the 10000th number
+        for _ in 0..10000 { rb.next_u64(); }
+
+        let v = (0..10).map(|_| rb.next_u64()).collect::<Vec<_>>();
+        assert_eq!(v,
+                   vec!(18143823860592706164, 8491801882678285927, 2699425367717515619,
+                        17196852593171130876, 2606123525235546165, 15790932315217671084,
+                        596345674630742204, 9947027391921273664, 11788097613744130851,
+                        10391409374914919106));
+    }
+
+    #[test]
+    fn test_isaac64_true_values_32() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut rng1 = Isaac64Rng::from_seed(seed);
+        let v = (0..12).map(|_| rng1.next_u32()).collect::<Vec<_>>();
+        // Subset of above values, as an LE u32 sequence
+        assert_eq!(v,
+                   [141028748, 127386717,
+                    1058730652, 3347555894,
+                    851491469, 4039984500,
+                    2692730210, 288449107,
+                    646103879, 2782923823,
+                    4195642895, 3252674613]);
+    }
+
+    #[test]
+    fn test_isaac64_true_values_mixed() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut rng = Isaac64Rng::from_seed(seed);
+        // Test alternating between `next_u64` and `next_u32` works as expected.
+        // Values are the same as `test_isaac64_true_values` and
+        // `test_isaac64_true_values_32`.
+        assert_eq!(rng.next_u64(), 547121783600835980);
+        assert_eq!(rng.next_u32(), 1058730652);
+        assert_eq!(rng.next_u32(), 3347555894);
+        assert_eq!(rng.next_u64(), 17351601304698403469);
+        assert_eq!(rng.next_u32(), 2692730210);
+        // Skip one u32
+        assert_eq!(rng.next_u64(), 11952566807690396487);
+        assert_eq!(rng.next_u32(), 4195642895);
+    }
+
+    #[test]
+    fn test_isaac64_true_bytes() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut rng1 = Isaac64Rng::from_seed(seed);
+        let mut buf = [0u8; 32];
+        rng1.fill_bytes(&mut buf);
+        // Same as first values in test_isaac64_true_values as bytes in LE order
+        assert_eq!(buf,
+                   [140, 237, 103, 8, 93, 196, 151, 7,
+                    156, 242, 26, 63, 54, 166, 135, 199,
+                    141, 186, 192, 50, 116, 69, 205, 240,
+                    98, 205, 127, 160, 83, 98, 49, 17]);
+    }
+
+    #[test]
+    fn test_isaac_new_uninitialized() {
+        // Compare the results from initializing `IsaacRng` with
+        // `new_from_u64(0)`, to make sure it is the same as the reference
+        // implementation when used uninitialized.
+        // Note: We only test the first 16 integers, not the full 256 of the
+        // first block.
+        let mut rng = Isaac64Rng::new_from_u64(0);
+        let vec = (0..16).map(|_| rng.next_u64()).collect::<Vec<_>>();
+        let expected: [u64; 16] = [
+            0xF67DFBA498E4937C, 0x84A5066A9204F380, 0xFEE34BD5F5514DBB,
+            0x4D1664739B8F80D6, 0x8607459AB52A14AA, 0x0E78BC5A98529E49,
+            0xFE5332822AD13777, 0x556C27525E33D01A, 0x08643CA615F3149F,
+            0xD0771FAF3CB04714, 0x30E86F68A37B008D, 0x3074EBC0488A3ADF,
+            0x270645EA7A2790BC, 0x5601A0A8D3763C6A, 0x2F83071F53F325DD,
+            0xB9090F3D42D2D2EA];
+        assert_eq!(vec, expected);
+    }
+
+    #[test]
+    fn test_rng_clone() {
+        let seed: &[_] = &[1, 23, 456, 7890, 12345];
+        let mut rng: Isaac64Rng = SeedableRng::from_seed(seed);
+        let mut clone = rng.clone();
+        for _ in 0..16 {
+            assert_eq!(rng.next_u64(), clone.next_u64());
+        }
+    }
+}
diff --git a/src/prng/mod.rs b/src/prng/mod.rs
new file mode 100644
index 00000000..69f5d8c3
--- /dev/null
+++ b/src/prng/mod.rs
@@ -0,0 +1,51 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Pseudo random number generators are algorithms to produce *apparently
+//! random* numbers deterministically, and usually fairly quickly.
+//!
+//! So long as the algorithm is computationally secure, is initialised with
+//! sufficient entropy (i.e. unknown by an attacker), and its internal state is
+//! also protected (unknown to an attacker), the output will also be
+//! *computationally secure*. Computationally Secure Pseudo Random Number
+//! Generators (CSPRNGs) are thus suitable sources of random numbers for
+//! cryptography. There are a couple of gotchas here, however. First, the seed
+//! used for initialisation must be unknown. Usually this should be provided by
+//! the operating system and should usually be secure, however this may not
+//! always be the case (especially soon after startup). Second, user-space
+//! memory may be vulnerable, for example when written to swap space, and after
+//! forking a child process should reinitialise any user-space PRNGs. For this
+//! reason it may be preferable to source random numbers directly from the OS
+//! for cryptographic applications.
+//!
+//! PRNGs are also widely used for non-cryptographic uses: randomised
+//! algorithms, simulations, games. In these applications it is usually not
+//! important for numbers to be cryptographically *unguessable*, but even
+//! distribution and independence from other samples (from the point of view
+//! of someone unaware of the algorithm used, at least) may still be important.
+//! Good PRNGs should satisfy these properties, but do not take them for
+//! granted; Wikipedia's article on
+//! [Pseudorandom number generators](https://en.wikipedia.org/wiki/Pseudorandom_number_generator)
+//! provides some background on this topic.
+//!
+//! Care should be taken when seeding (initialising) PRNGs. Some PRNGs have
+//! short periods for some seeds. If one PRNG is seeded from another using the
+//! same algorithm, it is possible that both will yield the same sequence of
+//! values (with some lag).
+
+mod chacha;
+mod isaac;
+mod isaac64;
+mod xorshift;
+
+pub use self::chacha::ChaChaRng;
+pub use self::isaac::IsaacRng;
+pub use self::isaac64::Isaac64Rng;
+pub use self::xorshift::XorShiftRng;
diff --git a/src/prng/xorshift.rs b/src/prng/xorshift.rs
new file mode 100644
index 00000000..8d977569
--- /dev/null
+++ b/src/prng/xorshift.rs
@@ -0,0 +1,117 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Xorshift generators
+
+use core::num::Wrapping as w;
+use core::fmt;
+use {Rng, SeedableRng, Rand};
+use impls;
+
+/// An Xorshift[1] random number
+/// generator.
+///
+/// The Xorshift algorithm is not suitable for cryptographic purposes
+/// but is very fast. If you do not know for sure that it fits your
+/// requirements, use a more secure one such as `IsaacRng` or `OsRng`.
+///
+/// [1]: Marsaglia, George (July 2003). ["Xorshift
+/// RNGs"](https://www.jstatsoft.org/v08/i14/paper). *Journal of
+/// Statistical Software*. Vol. 8 (Issue 14).
+#[derive(Clone)]
+pub struct XorShiftRng {
+    x: w<u32>,
+    y: w<u32>,
+    z: w<u32>,
+    w: w<u32>,
+}
+
+// Custom Debug implementation that does not expose the internal state
+impl fmt::Debug for XorShiftRng {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "XorShiftRng {{}}")
+    }
+}
+
+impl XorShiftRng {
+    /// Creates a new XorShiftRng instance which is not seeded.
+    ///
+    /// The initial values of this RNG are constants, so all generators created
+    /// by this function will yield the same stream of random numbers. It is
+    /// highly recommended that this is created through `SeedableRng` instead of
+    /// this function
+    pub fn new_unseeded() -> XorShiftRng {
+        XorShiftRng {
+            x: w(0x193a6754),
+            y: w(0xa8a7d469),
+            z: w(0x97830e05),
+            w: w(0x113ba7bb),
+        }
+    }
+}
+
+impl Rng for XorShiftRng {
+    #[inline]
+    fn next_u32(&mut self) -> u32 {
+        let x = self.x;
+        let t = x ^ (x << 11);
+        self.x = self.y;
+        self.y = self.z;
+        self.z = self.w;
+        let w_ = self.w;
+        self.w = w_ ^ (w_ >> 19) ^ (t ^ (t >> 8));
+        self.w.0
+    }
+
+    fn next_u64(&mut self) -> u64 {
+        impls::next_u64_via_u32(self)
+    }
+
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        impls::fill_bytes_via_u32(self, dest)
+    }
+}
+
+impl SeedableRng<[u32; 4]> for XorShiftRng {
+    /// Reseed an XorShiftRng. This will panic if `seed` is entirely 0.
+    fn reseed(&mut self, seed: [u32; 4]) {
+        assert!(!seed.iter().all(|&x| x == 0),
+                "XorShiftRng.reseed called with an all zero seed.");
+
+        self.x = w(seed[0]);
+        self.y = w(seed[1]);
+        self.z = w(seed[2]);
+        self.w = w(seed[3]);
+    }
+
+    /// Create a new XorShiftRng. This will panic if `seed` is entirely 0.
+    fn from_seed(seed: [u32; 4]) -> XorShiftRng {
+        assert!(!seed.iter().all(|&x| x == 0),
+                "XorShiftRng::from_seed called with an all zero seed.");
+
+        XorShiftRng {
+            x: w(seed[0]),
+            y: w(seed[1]),
+            z: w(seed[2]),
+            w: w(seed[3]),
+        }
+    }
+}
+
+impl Rand for XorShiftRng {
+    fn rand<R: Rng>(rng: &mut R) -> XorShiftRng {
+        let mut tuple: (u32, u32, u32, u32) = rng.gen();
+        while tuple == (0, 0, 0, 0) {
+            tuple = rng.gen();
+        }
+        let (x, y, z, w_) = tuple;
+        XorShiftRng { x: w(x), y: w(y), z: w(z), w: w(w_) }
+    }
+}
diff --git a/src/rand_impls.rs b/src/rand_impls.rs
index 704e38bf..d734aa38 100644
--- a/src/rand_impls.rs
+++ b/src/rand_impls.rs
@@ -10,8 +10,7 @@
 
 //! The implementations of `Rand` for the built-in types.
 
-use std::char;
-use std::mem;
+use core::{char, mem};
 
 use {Rand,Rng};
 
@@ -249,6 +248,7 @@ impl<T:Rand> Rand for Option<T> {
 
 #[cfg(test)]
 mod tests {
+    use impls;
     use {Rng, thread_rng, Open01, Closed01};
 
     struct ConstantRng(u64);
@@ -261,6 +261,10 @@ mod tests {
             let ConstantRng(v) = *self;
             v
         }
+
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u64(self, dest)
+        }
     }
 
     #[test]
diff --git a/src/reseeding.rs b/src/reseeding.rs
index 88458a3c..93bc3252 100644
--- a/src/reseeding.rs
+++ b/src/reseeding.rs
@@ -11,7 +11,7 @@
 //! A wrapper around another RNG that reseeds it after it
 //! generates a certain number of random bytes.
 
-use std::default::Default;
+use core::default::Default;
 
 use {Rng, SeedableRng};
 
@@ -147,6 +147,7 @@ impl Default for ReseedWithDefault {
 
 #[cfg(test)]
 mod test {
+    use impls;
     use std::default::Default;
     use std::iter::repeat;
     use super::{ReseedingRng, ReseedWithDefault};
@@ -162,6 +163,13 @@ mod test {
             // very random
             self.i - 1
         }
+        fn next_u64(&mut self) -> u64 {
+            impls::next_u64_via_u32(self)
+        }
+
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u64(self, dest)
+        }
     }
     impl Default for Counter {
         fn default() -> Counter {
diff --git a/src/seq.rs b/src/seq.rs
new file mode 100644
index 00000000..a7889fe3
--- /dev/null
+++ b/src/seq.rs
@@ -0,0 +1,337 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Functions for randomly accessing and sampling sequences.
+
+use super::Rng;
+
+// This crate is only enabled when either std or alloc is available.
+// BTreeMap is not as fast in tests, but better than nothing.
+#[cfg(feature="std")] use std::collections::HashMap;
+#[cfg(not(feature="std"))] use alloc::btree_map::BTreeMap;
+
+#[cfg(not(feature="std"))] use alloc::Vec;
+
+/// Randomly sample `amount` elements from a finite iterator.
+///
+/// The following can be returned:
+/// - `Ok`: `Vec` of `amount` non-repeating randomly sampled elements. The order is not random.
+/// - `Err`: `Vec` of all the elements from `iterable` in sequential order. This happens when the
+///   length of `iterable` was less than `amount`. This is considered an error since exactly
+///   `amount` elements is typically expected.
+///
+/// This implementation uses `O(len(iterable))` time and `O(amount)` memory.
+///
+/// # Example
+///
+/// ```rust
+/// use rand::{thread_rng, seq};
+///
+/// let mut rng = thread_rng();
+/// let sample = seq::sample_iter(&mut rng, 1..100, 5).unwrap();
+/// println!("{:?}", sample);
+/// ```
+pub fn sample_iter<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Result<Vec<T>, Vec<T>>
+    where I: IntoIterator<Item=T>,
+          R: Rng,
+{
+    let mut iter = iterable.into_iter();
+    let mut reservoir = Vec::with_capacity(amount);
+    reservoir.extend(iter.by_ref().take(amount));
+
+    // Continue unless the iterator was exhausted
+    //
+    // note: this prevents iterators that "restart" from causing problems.
+    // If the iterator stops once, then so do we.
+    if reservoir.len() == amount {
+        for (i, elem) in iter.enumerate() {
+            let k = rng.gen_range(0, i + 1 + amount);
+            if let Some(spot) = reservoir.get_mut(k) {
+                *spot = elem;
+            }
+        }
+        Ok(reservoir)
+    } else {
+        // Don't hang onto extra memory. There is a corner case where
+        // `amount` was much less than `len(iterable)`.
+        reservoir.shrink_to_fit();
+        Err(reservoir)
+    }
+}
+
+/// Randomly sample exactly `amount` values from `slice`.
+///
+/// The values are non-repeating and in random order.
+///
+/// This implementation uses `O(amount)` time and memory.
+///
+/// Panics if `amount > slice.len()`
+///
+/// # Example
+///
+/// ```rust
+/// use rand::{thread_rng, seq};
+///
+/// let mut rng = thread_rng();
+/// let values = vec![5, 6, 1, 3, 4, 6, 7];
+/// println!("{:?}", seq::sample_slice(&mut rng, &values, 3));
+/// ```
+pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T>
+    where R: Rng,
+          T: Clone
+{
+    let indices = sample_indices(rng, slice.len(), amount);
+
+    let mut out = Vec::with_capacity(amount);
+    out.extend(indices.iter().map(|i| slice[*i].clone()));
+    out
+}
+
+/// Randomly sample exactly `amount` references from `slice`.
+///
+/// The references are non-repeating and in random order.
+///
+/// This implementation uses `O(amount)` time and memory.
+///
+/// Panics if `amount > slice.len()`
+///
+/// # Example
+///
+/// ```rust
+/// use rand::{thread_rng, seq};
+///
+/// let mut rng = thread_rng();
+/// let values = vec![5, 6, 1, 3, 4, 6, 7];
+/// println!("{:?}", seq::sample_slice_ref(&mut rng, &values, 3));
+/// ```
+pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T>
+    where R: Rng
+{
+    let indices = sample_indices(rng, slice.len(), amount);
+
+    let mut out = Vec::with_capacity(amount);
+    out.extend(indices.iter().map(|i| &slice[*i]));
+    out
+}
+
+/// Randomly sample exactly `amount` indices from `0..length`.
+///
+/// The values are non-repeating and in random order.
+///
+/// This implementation uses `O(amount)` time and memory.
+///
+/// This method is used internally by the slice sampling methods, but it can sometimes be useful to
+/// have the indices themselves so this is provided as an alternative.
+///
+/// Panics if `amount > length`
+pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
+    where R: Rng,
+{
+    if amount > length {
+        panic!("`amount` must be less than or equal to `slice.len()`");
+    }
+
+    // We are going to have to allocate at least `amount` for the output no matter what. However,
+    // if we use the `cached` version we will have to allocate `amount` as a HashMap as well since
+    // it inserts an element for every loop.
+    //
+    // Therefore, if `amount >= length / 2` then inplace will be both faster and use less memory.
+    // In fact, benchmarks show the inplace version is faster for length up to about 20 times
+    // faster than amount.
+    //
+    // TODO: there is probably even more fine-tuning that can be done here since
+    // `HashMap::with_capacity(amount)` probably allocates more than `amount` in practice,
+    // and a trade off could probably be made between memory/cpu, since hashmap operations
+    // are slower than array index swapping.
+    if amount >= length / 20 {
+        sample_indices_inplace(rng, length, amount)
+    } else {
+        sample_indices_cache(rng, length, amount)
+    }
+}
+
+/// Sample an amount of indices using an inplace partial fisher yates method.
+///
+/// This allocates the entire `length` of indices and randomizes only the first `amount`.
+/// It then truncates to `amount` and returns.
+///
+/// This is better than using a HashMap "cache" when `amount >= length / 2` since it does not
+/// require allocating an extra cache and is much faster.
+fn sample_indices_inplace<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
+    where R: Rng,
+{
+    debug_assert!(amount <= length);
+    let mut indices: Vec<usize> = Vec::with_capacity(length);
+    indices.extend(0..length);
+    for i in 0..amount {
+        let j: usize = rng.gen_range(i, length);
+        let tmp = indices[i];
+        indices[i] = indices[j];
+        indices[j] = tmp;
+    }
+    indices.truncate(amount);
+    debug_assert_eq!(indices.len(), amount);
+    indices
+}
+
+
+/// This method performs a partial fisher-yates on a range of indices using a HashMap
+/// as a cache to record potential collisions.
+///
+/// The cache avoids allocating the entire `length` of values. This is especially useful when
+/// `amount <<< length`, i.e. select 3 non-repeating from 1_000_000
+fn sample_indices_cache<R>(
+    rng: &mut R,
+    length: usize,
+    amount: usize,
+) -> Vec<usize>
+    where R: Rng,
+{
+    debug_assert!(amount <= length);
+    #[cfg(feature="std")] let mut cache = HashMap::with_capacity(amount);
+    #[cfg(not(feature="std"))] let mut cache = BTreeMap::new();
+    let mut out = Vec::with_capacity(amount);
+    for i in 0..amount {
+        let j: usize = rng.gen_range(i, length);
+
+        // equiv: let tmp = slice[i];
+        let tmp = match cache.get(&i) {
+            Some(e) => *e,
+            None => i,
+        };
+
+        // equiv: slice[i] = slice[j];
+        let x = match cache.get(&j) {
+            Some(x) => *x,
+            None => j,
+        };
+
+        // equiv: slice[j] = tmp;
+        cache.insert(j, tmp);
+
+        // note that in the inplace version, slice[i] is automatically "returned" value
+        out.push(x);
+    }
+    debug_assert_eq!(out.len(), amount);
+    out
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use {thread_rng, XorShiftRng, SeedableRng};
+
+    #[test]
+    fn test_sample_iter() {
+        let min_val = 1;
+        let max_val = 100;
+
+        let mut r = thread_rng();
+        let vals = (min_val..max_val).collect::<Vec<i32>>();
+        let small_sample = sample_iter(&mut r, vals.iter(), 5).unwrap();
+        let large_sample = sample_iter(&mut r, vals.iter(), vals.len() + 5).unwrap_err();
+
+        assert_eq!(small_sample.len(), 5);
+        assert_eq!(large_sample.len(), vals.len());
+        // no randomization happens when amount >= len
+        assert_eq!(large_sample, vals.iter().collect::<Vec<_>>());
+
+        assert!(small_sample.iter().all(|e| {
+            **e >= min_val && **e <= max_val
+        }));
+    }
+    #[test]
+    fn test_sample_slice_boundaries() {
+        let empty: &[u8] = &[];
+
+        let mut r = thread_rng();
+
+        // sample 0 items
+        assert_eq!(sample_slice(&mut r, empty, 0), vec![]);
+        assert_eq!(sample_slice(&mut r, &[42, 2, 42], 0), vec![]);
+
+        // sample 1 item
+        assert_eq!(sample_slice(&mut r, &[42], 1), vec![42]);
+        let v = sample_slice(&mut r, &[1, 42], 1)[0];
+        assert!(v == 1 || v == 42);
+
+        // sample "all" the items
+        let v = sample_slice(&mut r, &[42, 133], 2);
+        assert!(v == vec![42, 133] || v == vec![133, 42]);
+
+        assert_eq!(sample_indices_inplace(&mut r, 0, 0), vec![]);
+        assert_eq!(sample_indices_inplace(&mut r, 1, 0), vec![]);
+        assert_eq!(sample_indices_inplace(&mut r, 1, 1), vec![0]);
+
+        assert_eq!(sample_indices_cache(&mut r, 0, 0), vec![]);
+        assert_eq!(sample_indices_cache(&mut r, 1, 0), vec![]);
+        assert_eq!(sample_indices_cache(&mut r, 1, 1), vec![0]);
+
+        // Make sure lucky 777's aren't lucky
+        let slice = &[42, 777];
+        let mut num_42 = 0;
+        let total = 1000;
+        for _ in 0..total {
+            let v = sample_slice(&mut r, slice, 1);
+            assert_eq!(v.len(), 1);
+            let v = v[0];
+            assert!(v == 42 || v == 777);
+            if v == 42 {
+                num_42 += 1;
+            }
+        }
+        let ratio_42 = num_42 as f64 / 1000 as f64;
+        assert!(0.4 <= ratio_42 || ratio_42 <= 0.6, "{}", ratio_42);
+    }
+
+    #[test]
+    fn test_sample_slice() {
+        let xor_rng = XorShiftRng::from_seed;
+
+        let max_range = 100;
+        let mut r = thread_rng();
+
+        for length in 1usize..max_range {
+            let amount = r.gen_range(0, length);
+            let seed: [u32; 4] = [
+                r.next_u32(), r.next_u32(), r.next_u32(), r.next_u32()
+            ];
+
+            println!("Selecting indices: len={}, amount={}, seed={:?}", length, amount, seed);
+
+            // assert that the two index methods give exactly the same result
+            let inplace = sample_indices_inplace(
+                &mut xor_rng(seed), length, amount);
+            let cache = sample_indices_cache(
+                &mut xor_rng(seed), length, amount);
+            assert_eq!(inplace, cache);
+
+            // assert the basics work
+            let regular = sample_indices(
+                &mut xor_rng(seed), length, amount);
+            assert_eq!(regular.len(), amount);
+            assert!(regular.iter().all(|e| *e < length));
+            assert_eq!(regular, inplace);
+
+            // also test that sampling the slice works
+            let vec: Vec<usize> = (0..length).collect();
+            {
+                let result = sample_slice(&mut xor_rng(seed), &vec, amount);
+                assert_eq!(result, regular);
+            }
+
+            {
+                let result = sample_slice_ref(&mut xor_rng(seed), &vec, amount);
+                let expected = regular.iter().map(|v| v).collect::<Vec<_>>();
+                assert_eq!(result, expected);
+            }
+        }
+    }
+}
diff --git a/utils/ziggurat_tables.py b/utils/ziggurat_tables.py
new file mode 100755
index 00000000..762f9565
--- /dev/null
+++ b/utils/ziggurat_tables.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+#
+# Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# This creates the tables used for distributions implemented using the
+# ziggurat algorithm in `rand::distributions;`. They are
+# (basically) the tables as used in the ZIGNOR variant (Doornik 2005).
+# They are changed rarely, so the generated file should be checked in
+# to git.
+#
+# It creates 3 tables: X as in the paper, F which is f(x_i), and
+# F_DIFF which is f(x_i) - f(x_{i-1}). The latter two are just cached
+# values which is not done in that paper (but is done in other
+# variants). Note that the adZigR table is unnecessary because of
+# algebra.
+#
+# It is designed to be compatible with Python 2 and 3.
+
+from math import exp, sqrt, log, floor
+import random
+
+# The order should match the return value of `tables`
+TABLE_NAMES = ['X', 'F']
+
+# The actual length of the table is 1 more, to stop
+# index-out-of-bounds errors. This should match the bitwise operation
+# to find `i` in `zigurrat` in `libstd/rand/mod.rs`. Also the *_R and
+# *_V constants below depend on this value.
+TABLE_LEN = 256
+
+# equivalent to `zigNorInit` in Doornik2005, but generalised to any
+# distribution. r = dR, v = dV, f = probability density function,
+# f_inv = inverse of f
+def tables(r, v, f, f_inv):
+    # compute the x_i
+    xvec = [0]*(TABLE_LEN+1)
+
+    xvec[0] = v / f(r)
+    xvec[1] = r
+
+    for i in range(2, TABLE_LEN):
+        last = xvec[i-1]
+        xvec[i] = f_inv(v / last + f(last))
+
+    # cache the f's
+    fvec = [0]*(TABLE_LEN+1)
+    for i in range(TABLE_LEN+1):
+        fvec[i] = f(xvec[i])
+
+    return xvec, fvec
+
+# Distributions
+# N(0, 1)
+def norm_f(x):
+    return exp(-x*x/2.0)
+def norm_f_inv(y):
+    return sqrt(-2.0*log(y))
+
+NORM_R = 3.6541528853610088
+NORM_V = 0.00492867323399
+
+NORM = tables(NORM_R, NORM_V,
+              norm_f, norm_f_inv)
+
+# Exp(1)
+def exp_f(x):
+    return exp(-x)
+def exp_f_inv(y):
+    return -log(y)
+
+EXP_R = 7.69711747013104972
+EXP_V = 0.0039496598225815571993
+
+EXP = tables(EXP_R, EXP_V,
+             exp_f, exp_f_inv)
+
+
+# Output the tables/constants/types
+
+def render_static(name, type, value):
+    # no space or
+    return 'pub static %s: %s =%s;\n' % (name, type, value)
+
+# static `name`: [`type`, .. `len(values)`] =
+#     [values[0], ..., values[3],
+#      values[4], ..., values[7],
+#      ... ];
+def render_table(name, values):
+    rows = []
+    # 4 values on each row
+    for i in range(0, len(values), 4):
+        row = values[i:i+4]
+        rows.append(', '.join('%.18f' % f for f in row))
+
+    rendered = '\n    [%s]' % ',\n     '.join(rows)
+    return render_static(name, '[f64, .. %d]' % len(values), rendered)
+
+
+with open('ziggurat_tables.rs', 'w') as f:
+    f.write('''// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// Tables for distributions which are sampled using the ziggurat
+// algorithm. Autogenerated by `ziggurat_tables.py`.
+
+pub type ZigTable = &\'static [f64, .. %d];
+'''  % (TABLE_LEN + 1))
+    for name, tables, r in [('NORM', NORM, NORM_R),
+                            ('EXP', EXP, EXP_R)]:
+        f.write(render_static('ZIG_%s_R' % name, 'f64', ' %.18f' % r))
+        for (tabname, table) in zip(TABLE_NAMES, tables):
+            f.write(render_table('ZIG_%s_%s' % (name, tabname), table))