diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml index 5a9cc2dfc4cb7..d38b5eb30e947 100644 --- a/stdlib/Random/Project.toml +++ b/stdlib/Random/Project.toml @@ -2,9 +2,6 @@ name = "Random" uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" version = "1.11.0" -[deps] -SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" - [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index bd3df8e54f194..d02022d429e6a 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -102,80 +102,6 @@ end rng_native_52(::RandomDevice) = UInt64 -## SeedHasher - -""" - Random.SeedHasher(seed=nothing) - -Create a `Random.SeedHasher` RNG object, which generates random bytes with the help -of a cryptographic hash function (SHA2), via calls to [`Random.hash_seed`](@ref). - -Given two seeds `s1` and `s2`, the random streams generated by -`SeedHasher(s1)` and `SeedHasher(s2)` should be distinct if and only if -`s1` and `s2` are distinct. - -This RNG is used by default in `Random.seed!(::AbstractRNG, seed::Any)`, such that -RNGs usually need only to implement `seed!(rng, ::AbstractRNG)`. - -This is an internal type, subject to change. -""" -mutable struct SeedHasher <: AbstractRNG - bytes::Vector{UInt8} - idx::Int - cnt::Int64 - - SeedHasher(seed=nothing) = seed!(new(), seed) -end - -seed!(rng::SeedHasher, seeder::AbstractRNG) = seed!(rng, rand(seeder, UInt64, 4)) -seed!(rng::SeedHasher, ::Nothing) = seed!(rng, RandomDevice()) - -function seed!(rng::SeedHasher, seed) - # typically, no more than 256 bits will be needed, so use - # SHA2_256 because it's faster - ctx = SHA2_256_CTX() - hash_seed(seed, ctx) - rng.bytes = SHA.digest!(ctx)::Vector{UInt8} - rng.idx = 0 - rng.cnt = 0 - rng -end - -@noinline function rehash!(rng::SeedHasher) - # more random bytes are necessary, from now on use SHA2_512 to generate - # more bytes at once - ctx = SHA2_512_CTX() - SHA.update!(ctx, rng.bytes) - # also hash the counter, just for the extremely unlikely case where the hash of - # rng.bytes is equal to rng.bytes (i.e. rng.bytes is a "fixed point"), or more generally - # if there is a small cycle - SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, rng.cnt += 1)) - rng.bytes = SHA.digest!(ctx) - rng.idx = 0 - rng -end - -function rand(rng::SeedHasher, ::SamplerType{UInt8}) - rng.idx < length(rng.bytes) || rehash!(rng) - rng.bytes[rng.idx += 1] -end - -for TT = Base.BitInteger_types - TT === UInt8 && continue - @eval function rand(rng::SeedHasher, ::SamplerType{$TT}) - xx = zero($TT) - for ii = 0:sizeof($TT)-1 - xx |= (rand(rng, UInt8) % $TT) << (8 * ii) - end - xx - end -end - -rand(rng::SeedHasher, ::SamplerType{Bool}) = rand(rng, UInt8) % Bool - -rng_native_52(::SeedHasher) = UInt64 - - ## seeding """ @@ -244,12 +170,12 @@ function seed!(rng::AbstractRNG, seed::Any=nothing) end -### hash_seed() +### hashseed!() """ - Random.hash_seed(seed, ctx::SHA_CTX)::AbstractVector{UInt8} + Random.hashseed!(ctx::SeedHasher, seed) -Update `ctx` via `SHA.update!` with the content of `seed`. +Update `ctx` via `ingest!` with the content of `seed`. This function is used by the [`SeedHasher`](@ref) RNG to produce random bytes. @@ -257,15 +183,15 @@ random bytes. `Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`, but modules can extend this function for types they own. -`hash_seed` is "injective" : for two equivalent context objects `cn` and `cm`, +`hashseed!` is "injective" : for two equivalent context objects `cn` and `cm`, if `n != m`, then `cn` and `cm` will be distinct after calling -`hash_seed(n, cn); hash_seed(m, cm)`. +`hashseed!(cn, n); hashseed!(cm, m)`. Moreover, if `n == m`, then `cn` and `cm` remain equivalent after calling -`hash_seed(n, cn); hash_seed(m, cm)`. +`hashseed!(cn, n); hashseed!(cm, m)`. """ -function hash_seed end +function hashseed!! end -function hash_seed(seed::Integer, ctx::SHA_CTX) +function hashseed!(ctx::SeedHasher, seed::Integer) neg = signbit(seed) if neg seed = ~seed @@ -274,35 +200,35 @@ function hash_seed(seed::Integer, ctx::SHA_CTX) while true word = (seed % UInt32) & 0xffffffff seed >>>= 32 - SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, word)) + ingest!(ctx, reinterpret(NTuple{4, UInt8}, word)) iszero(seed) && break end # make sure the hash of negative numbers is different from the hash of positive numbers - neg && SHA.update!(ctx, (0x01,)) + neg && ingest!(ctx, (0x01,)) nothing end -function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}}, ctx::SHA_CTX) +function hashseed!(ctx::SeedHasher, seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}}) for xx in seed - SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx))) + ingest!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx))) end - # discriminate from hash_seed(::Integer) - SHA.update!(ctx, (0x10,)) + # discriminate from hashseed!(ctx, ::Integer) + ingest!(ctx, (0x10,)) end -function hash_seed(str::AbstractString, ctx::SHA_CTX) +function hashseed!(ctx::SeedHasher, str::AbstractString) # convert to String such that `codeunits(str)` below is consistent between equal # strings of different types str = String(str) - SHA.update!(ctx, codeunits(str)) - # signature for strings: so far, all hash_seed functions end-up hashing a multiple + ingest!(ctx, codeunits(str)) + # signature for strings: so far, all hashseed! functions end-up hashing a multiple # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many # bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding), # and then hash the signature 0x05; in order for strings of different lengths to have # different hashes, padding bytes are set equal to the number of padding bytes pad = 4 - mod(ncodeunits(str), 4) for _=1:pad - SHA.update!(ctx, (pad % UInt8,)) + ingest!(ctx, (pad % UInt8,)) end - SHA.update!(ctx, (0x05,)) + ingest!(ctx, (0x05,)) end diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl index cc598094b1956..0dc4a98459fd5 100644 --- a/stdlib/Random/src/Random.jl +++ b/stdlib/Random/src/Random.jl @@ -13,7 +13,6 @@ include("DSFMT.jl") using .DSFMT using Base.GMP.MPZ using Base.GMP: Limb -using SHA: SHA, SHA2_256_CTX, SHA2_512_CTX, SHA_CTX using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing, _throw_argerror @@ -418,6 +417,7 @@ rand! include("Xoshiro.jl") +include("SeedHasher.jl") include("RNGs.jl") include("MersenneTwister.jl") include("generation.jl") diff --git a/stdlib/Random/src/SeedHasher.jl b/stdlib/Random/src/SeedHasher.jl new file mode 100644 index 0000000000000..b0e108544406a --- /dev/null +++ b/stdlib/Random/src/SeedHasher.jl @@ -0,0 +1,178 @@ +## SeedHasher + +#= +`SeedHasher` implements the seed-mixing algorithm designed by M. E. O'Neill +as an alternative to `std::seed_seq`, intended to produce, from a +user-provided seed, high-quality initialization data for RNGs. +Cf. https://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html + +This implementation is derived from the `seed_seq_fe` C++ reference version: +https://gist.github.com/imneme/540829265469e673d045 (MIT license). + +The original algorithm uses a fixed-size entropy buffer (128 or 256 bits). +`SeedHasher` adjusts the buffer size dynamically, roughly to the size of the +input seed, up to a maximum of 256 bits. +=# + +const _SH_MAX_ENTROPY = 8 # number of stored UInt32 words of entropy +const _SH_BUFSIZE = 32 +@assert _SH_BUFSIZE > _SH_MAX_ENTROPY # the diff should be at least 8 for decent performance +const _SH_XSHIFT::UInt32 = UInt32(sizeof(UInt32) * 4) + +""" + Random.SeedHasher(seed=nothing) + +Create a `Random.SeedHasher` RNG, which produces random bytes derived from the +entropy extracted from `seed` via calls to [`Random.hashseed!`](@ref). + +Given two seeds `s1` and `s2`, the random streams generated by +`SeedHasher(s1)` and `SeedHasher(s2)` should be distinct if and only if +`s1` and `s2` are distinct. + +`SeedHasher` is used by default in `Random.seed!(::AbstractRNG, seed::Any)`, +so RNGs typically need only implement `seed!(rng, ::AbstractRNG)`. + +!!! warning + `SeedHasher` is intended only for producing initialization data for other RNGs. + It is *not* suitable for use as a general-purpose RNG. + +This is an internal type, subject to change. +""" +mutable struct SeedHasher <: AbstractRNG + const mixer::Memory{UInt32} + len::Int # size of the entropy store + idx::Int + hash_const::UInt32 + + SeedHasher(::UndefInitializer) = + new(Memory{UInt32}(undef, _SH_BUFSIZE), 0, 0, UInt32(0)) +end + +SeedHasher(seed=nothing) = seed!(SeedHasher(undef), seed) + +seed!(rng::SeedHasher, ::Nothing) = seed!(rng, RandomDevice()) + +function seed!(rng::SeedHasher, seeder::AbstractRNG) + # no seed mixing necessary, directly randomize `mixer` + rand!(seeder, view(rng.mixer, 1:_SH_MAX_ENTROPY)) + rng.len = _SH_MAX_ENTROPY + rng.idx = 0 + rng.hash_const = 0x8b51f9dd # INIT_B + rng +end + +function seed!(rng::SeedHasher, seed) + rng.len = 0 + rng.idx = 0 + rng.hash_const = 0x43b0d7e5 # INIT_A + hashseed!(rng, seed) + finalize!(rng) + if rng.len <= 2 + # additional mixing (`stir()` in the C++ code) + rng.idx = rng.len << 2 + rng.len = 0 + rng.hash_const = 0x43b0d7e5 # this follows the C++ code, but might not be necessary? + mix_entropy!(rng) + end + rng.idx = 0 + rng.hash_const = 0x8b51f9dd # INIT_B + rng +end + +# During seed ingestion (entropy extraction), the seed is encoded as bytes +# (via `hashseed!`) and written verbatim into `rng.mixer`, starting at the +# byte index `rng.idx + 1`. Once the buffer is filled for the first time, +# this initial block is mixed to produce the initial state of the entropy +# store, which occupies the first `_EM_MAX_ENTROPY` UInt32 words of +# `rng.mixer`. The remaining portion of the buffer stays available to ingest +# further bytes from the seed. +function ingest!(rng::SeedHasher, + xs::Union{AbstractArray{UInt8}, NTuple{N, UInt8}}) where N + mixer8 = reinterpret(UInt8, rng.mixer) + xsi = 0 # number of consumed bytes from xs + while xsi != length(xs) + if rng.idx == length(mixer8) + mix_entropy!(rng) + # now, the upper side of mixer8 is free + end + (; idx) = rng + tocopy = min(length(xs) - xsi, length(mixer8) - idx) + for ii = 1:tocopy + @inbounds mixer8[idx + ii] = xs[xsi + ii] + end + xsi += tocopy + rng.idx += tocopy + end + rng +end + +function finalize!(rng) + mixer8 = reinterpret(UInt8, rng.mixer) + while 0 != (rng.idx & 0x3) + mixer8[rng.idx += 1] = 0 + end + mix_entropy!(rng) +end + +function mix_entropy!(rng::SeedHasher) + function hash(value::UInt32) + value ⊻= rng.hash_const + rng.hash_const *= 0x931e8875 + value *= rng.hash_const + value ⊻= value >> _SH_XSHIFT + value + end + + function mix(x::UInt32, y::UInt32) + result::UInt32 = 0xca01f9dd * x - 0x4973f715 * y + result ⊻= result >> _SH_XSHIFT + result + end + + (; mixer, len, idx) = rng + @assert 0 == (idx & 0x3) + idx >>= 2 # number of `UInt32` values written into mixer + + if len == 0 # nothing has been mixed in so far + len = rng.len = min(_SH_MAX_ENTROPY, idx) + for ii = 1:len + @inbounds mixer[ii] = hash(mixer[ii]) + end + for isrc = 1:len, idst = 1:len + if isrc != idst + @inbounds mixer[idst] = mix(mixer[idst], hash(mixer[isrc])) + end + end + end + + for ii = len+1:idx + for idst = 1:len + @inbounds mixer[idst] = mix(mixer[idst], hash(mixer[ii])) + end + end + + rng.idx = len << 2 + rng +end + +### generation + +function rand(rng::SeedHasher, ::SamplerType{UInt32}) + (; mixer, len, idx, hash_const) = rng + dataval = @inbounds mixer[idx += 1] + dataval ⊻= hash_const + hash_const *= 0x58f38ded # MULT_B + dataval *= hash_const + dataval ⊻= dataval >> _SH_XSHIFT + rng.idx = idx == len ? 0 : idx + rng.hash_const = hash_const + dataval +end + +rand(rng::SeedHasher, T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32)) = + rand(rng, UInt32) % T[] +rand(rng::SeedHasher, T::SamplerUnion(Int64, UInt64)) = + (rand(rng, UInt32) % T[]) << 32 ⊻ rand(rng, UInt32) % T[] +rand(rng::SeedHasher, T::SamplerUnion(Int128, UInt128)) = rand_generic(rng, T[]) + +rng_native_52(::SeedHasher) = UInt64 diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl index 55cbf02f6ad9d..4a8fe3509db9a 100644 --- a/stdlib/Random/test/runtests.jl +++ b/stdlib/Random/test/runtests.jl @@ -13,7 +13,6 @@ using Random.DSFMT using Random: default_rng, Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I using Random: jump_128, jump_192, jump_128!, jump_192!, SeedHasher -import SHA import Future # randjump function test_uniform(xs::AbstractArray{T}) where {T<:AbstractFloat} @@ -1246,11 +1245,15 @@ end end -@testset "seed! and hash_seed" begin +@testset "seed! and hashseed!" begin function hash_seed(seed) - ctx = SHA.SHA2_256_CTX() - Random.hash_seed(seed, ctx) - bytes2hex(SHA.digest!(ctx)) + # prepare SeedHasher like in seed!(::SeedHasher, seed) + rng = SeedHasher(undef) + rng.len = 0 + rng.idx = 0 + rng.hash_const = 0x43b0d7e5 + Random.hashseed!(rng, seed) + bytes2hex(view(reinterpret(UInt8, rng.mixer), 1:rng.idx)) end # Test that: