Future custom rng#98
Conversation
This is for usage together with the new RNG API in future.
For now only for Xoshiro256++. For usage with the new RNG API in future.
These are (for now) meant for usage together with the new RNG API in future.
|
Initial attempt at supporting the RNG API in library(future)
future:::parallel_rng_kind(
kind = "L'Ecuyer-CMRG",
set_kind = base::RNGkind,
next_stream = parallel::nextRNGStream,
next_substream = parallel::nextRNGSubStream,
is_seed = future:::is_lecyer_cmrg_seed,
as_seed = future:::as_lecyer_cmrg_seed
)
#> $kind
#> [1] "L'Ecuyer-CMRG"
#>
#> $set_kind
#> function (kind = NULL, normal.kind = NULL, sample.kind = NULL)
#> {
#> kinds <- c("Wichmann-Hill", "Marsaglia-Multicarry", "Super-Duper",
#> "Mersenne-Twister", "Knuth-TAOCP", "user-supplied", "Knuth-TAOCP-2002",
#> "L'Ecuyer-CMRG", "default")
#> n.kinds <- c("Buggy Kinderman-Ramage", "Ahrens-Dieter", "Box-Muller",
#> "user-supplied", "Inversion", "Kinderman-Ramage", "default")
#> s.kinds <- c("Rounding", "Rejection", "default")
#> do.set <- length(kind) > 0L
#> if (do.set) {
#> if (!is.character(kind) || length(kind) > 1L)
#> stop("'kind' must be a character string (RNG to be used).")
#> if (is.na(i.knd <- pmatch(kind, kinds) - 1L))
#> stop(gettextf("'%s' is not a valid abbreviation of an RNG",
#> kind), domain = NA)
#> if (i.knd == length(kinds) - 1L)
#> i.knd <- -1L
#> }
#> else i.knd <- NULL
#> if (!is.null(normal.kind)) {
#> if (!is.character(normal.kind) || length(normal.kind) !=
#> 1L)
#> stop(gettextf("'%s' must be a character string",
#> "normal.kind"), domain = NA)
#> normal.kind <- pmatch(normal.kind, n.kinds) - 1L
#> if (is.na(normal.kind))
#> stop(gettextf("'%s' is not a valid choice", normal.kind),
#> domain = NA)
#> if (normal.kind == 0L)
#> warning("buggy version of Kinderman-Ramage generator used",
#> domain = NA)
#> if (normal.kind == length(n.kinds) - 1L)
#> normal.kind <- -1L
#> }
#> if (!is.null(sample.kind)) {
#> if (!is.character(sample.kind) || length(sample.kind) !=
#> 1L)
#> stop(gettextf("'%s' must be a character string",
#> "sample.kind"), domain = NA)
#> sample.kind <- pmatch(sample.kind, s.kinds) - 1L
#> if (is.na(sample.kind))
#> stop(gettextf("'%s' is not a valid choice", sample.kind),
#> domain = NA)
#> if (sample.kind == 0L)
#> warning("non-uniform 'Rounding' sampler used", domain = NA)
#> if (sample.kind == length(s.kinds) - 1L)
#> sample.kind <- -1L
#> }
#> r <- 1L + .Internal(RNGkind(i.knd, normal.kind, sample.kind))
#> r <- c(kinds[r[1L]], n.kinds[r[2L]], s.kinds[r[3L]])
#> if (do.set || !is.null(normal.kind) || !is.null(sample.kind))
#> invisible(r)
#> else r
#> }
#> <bytecode: 0x58da23eee168>
#> <environment: namespace:base>
#>
#> $next_stream
#> function (seed)
#> {
#> if (!is.integer(seed) || seed[1L]%%100L != 7L)
#> stop(gettextf("invalid value of %s", "'seed'"), domain = NA)
#> .Call(C_nextStream, seed)
#> }
#> <bytecode: 0x58da24e7e500>
#> <environment: namespace:parallel>
#>
#> $next_substream
#> function (seed)
#> {
#> if (!is.integer(seed) || seed[1L]%%100L != 7L)
#> stop(gettextf("invalid value of %s", "'seed'"), domain = NA)
#> .Call(C_nextSubStream, seed)
#> }
#> <bytecode: 0x58da24e83140>
#> <environment: namespace:parallel>
#>
#> $is_seed
#> function (seed)
#> {
#> is.numeric(seed) && length(seed) == 7L && all(is.finite(seed)) &&
#> (seed[1]%%10000L == 407L)
#> }
#> <bytecode: 0x58da24e821f0>
#> <environment: namespace:future>
#>
#> $as_seed
#> function (seed)
#> {
#> if (is.logical(seed)) {
#> stop_if_not(length(seed) == 1L)
#> if (!is.na(seed) && !seed) {
#> stopf("Argument 'seed' must be TRUE if logical: %s",
#> seed)
#> }
#> oseed <- get_random_seed()
#> if (!is.na(seed) && seed) {
#> if (is_lecyer_cmrg_seed(oseed))
#> return(oseed)
#> }
#> okind <- RNGkind("L'Ecuyer-CMRG")[1]
#> on.exit(set_random_seed(oseed, kind = okind, set_kind = RNGkind),
#> add = TRUE)
#> return(get_random_seed())
#> }
#> stop_if_not(is.numeric(seed), all(is.finite(seed)))
#> seed <- as.integer(seed)
#> if (is_lecyer_cmrg_seed(seed)) {
#> return(seed)
#> }
#> if (length(seed) == 1L) {
#> oseed <- get_random_seed()
#> okind <- RNGkind("L'Ecuyer-CMRG")[1]
#> on.exit(set_random_seed(oseed, kind = okind, set_kind = RNGkind),
#> add = TRUE)
#> set.seed(seed)
#> return(get_random_seed())
#> }
#> stopf("Argument 'seed' must be L'Ecuyer-CMRG RNG seed as returned by parallel::nextRNGStream() or an single integer: %s",
#> capture.output(str(seed)))
#> }
#> <bytecode: 0x58da24e84f10>
#> <environment: namespace:future>
future:::make_rng_seeds(3, seed = TRUE)
#> [[1]]
#> [1] 10407 -1689160120 222868402 -2053856020 -1630003168 -1354712463
#> [7] -1964658922
#>
#> [[2]]
#> [1] 10407 1991458783 1776575577 -1166848998 20239795 -1044128190
#> [7] 1102892864
#>
#> [[3]]
#> [1] 10407 868387449 1695576762 355151608 2069567958 862639667 124732210
system.time(future:::make_rng_seeds(1e5, seed = TRUE))
#> user system elapsed
#> 0.306 0.015 0.322
library(dqrng)
future:::parallel_rng_kind(
kind = "Xoshiro256++",
set_kind = dqrng::dqRNGkind,
next_stream = dqrng:::next_stream,
next_substream = dqrng:::next_substream,
is_seed = dqrng:::is_xoshiro256pp_seed,
as_seed = dqrng:::as_xoshiro256pp_seed
)
#> $kind
#> [1] "Xoshiro256++"
#>
#> $set_kind
#> function (kind, normal_kind = "ignored")
#> {
#> invisible(.Call(`_dqrng_dqRNGkind`, kind, normal_kind))
#> }
#> <bytecode: 0x58da27b66ba8>
#> <environment: namespace:dqrng>
#>
#> $next_stream
#> function (state)
#> {
#> .Call(`_dqrng_next_stream`, state)
#> }
#> <bytecode: 0x58da27b69fc8>
#> <environment: namespace:dqrng>
#>
#> $next_substream
#> function (state)
#> {
#> .Call(`_dqrng_next_substream`, state)
#> }
#> <bytecode: 0x58da27b69698>
#> <environment: namespace:dqrng>
#>
#> $is_seed
#> function (seed)
#> {
#> is.character(seed) && length(seed) == 5L && seed[1] == "xoshiro256++" &&
#> grep("^[0-9]+$", seed, invert = TRUE) == 1
#> }
#> <bytecode: 0x58da27b68d68>
#> <environment: namespace:dqrng>
#>
#> $as_seed
#> function (seed)
#> {
#> if (is.logical(seed)) {
#> if (length(seed) != 1L && !is.na(seed) && !seed) {
#> stop("Argument 'seed' must be TRUE if logical: %s",
#> seed)
#> }
#> oseed <- dqrng_get_state()
#> if (!is.na(seed) && seed) {
#> if (is_xoshiro256pp_seed(oseed))
#> return(oseed)
#> }
#> on.exit(dqrng_set_state(oseed), add = TRUE)
#> dqRNGkind("Xoshiro256++")
#> return(dqrng_get_state())
#> }
#> if (is_xoshiro256pp_seed(seed)) {
#> return(seed)
#> }
#> if (is.numeric(seed) && all(is.finite(seed)) && length(seed) <=
#> 2) {
#> seed <- as.integer(seed)
#> oseed <- dqrng_get_state()
#> on.exit(dqrng_set_state(oseed), add = TRUE)
#> dqRNGkind("Xoshiro256++")
#> dqset.seed(seed)
#> return(dqrng_get_state())
#> }
#> stop("Argument 'seed' must be TRUE, Xoshiro256++ RNG state as returned by dqrng_get_state() or an integer vector with length <= 2")
#> }
#> <bytecode: 0x58da27b6bc48>
#> <environment: namespace:dqrng>
future:::make_rng_seeds(3, seed = TRUE)
#> [[1]]
#> [1] "xoshiro256++" "11366403006693953604" "5776312522536079727"
#> [4] "9618926254244045008" "5165000284952699762"
#>
#> [[2]]
#> [1] "xoshiro256++" "16669368997197376375" "18351587136123902063"
#> [4] "4810568700463215920" "6616614669550573422"
#>
#> [[3]]
#> [1] "xoshiro256++" "9919774969775135560" "2550819741878577113"
#> [4] "1962113790411544427" "2910686945654831949"
system.time(future:::make_rng_seeds(1e5, seed = TRUE))
#> user system elapsed
#> 2.733 0.042 2.806Created on 2025-06-29 with reprex v2.1.1 @HenrikBengtsson Do you have a test case that one could use for something more realistic? |
|
This is how benchmark results would change (along with a 95% confidence interval in relative change) if bacd1f7 is merged into main:
|
|
This is how benchmark results would change (along with a 95% confidence interval in relative change) if 59d7eed is merged into main:
|
|
This is awesome!
I don't have anything generic yet, but I have "rng" unit tests in future, future.apply, doFuture, ... They're under library(dqrng)
void <- future:::parallel_rng_kind(
kind = "Xoshiro256++",
set_kind = dqrng::dqRNGkind,
next_stream = dqrng:::next_stream,
next_substream = dqrng:::next_substream,
is_seed = dqrng:::is_xoshiro256pp_seed,
as_seed = dqrng:::as_xoshiro256pp_seed
)
## Give an error, bc assuming Ecuyer-CMRG
future:::testme("rng")
future.apply:::testme("rng")
## Ignored and give warnings, bc assumes integer `seed`
doFuture:::testme("foreach_dofuture,rng")
## Works
future:::testme("rng_utils")
future.callr:::testme("rng")
future.mirai:::testme("rng")I'll adjust these tests so they're invariant or agile to I'll probably also end up adding something to future.tests, but that's later on. |
|
I got some strange messages from library(future)
plan(multisession)
library(future.apply)
void <- future:::parallel_rng_kind(
kind = "L'Ecuyer-CMRG",
set_kind = base::RNGkind,
next_stream = parallel::nextRNGStream,
next_substream = parallel::nextRNGSubStream,
is_seed = future:::is_lecyer_cmrg_seed,
as_seed = future:::as_lecyer_cmrg_seed
)
# identical results
future_sapply(1:8, \(x){sum(runif(1e6))}, future.seed = 42)
#> [1] 499815.2 499821.5 499642.8 499897.5 499980.7 499645.6 499786.0 500107.4
future_sapply(1:8, \(x){sum(runif(1e6))}, future.seed = 42)
#> [1] 499815.2 499821.5 499642.8 499897.5 499980.7 499645.6 499786.0 500107.4
library(dqrng)
void <- future:::parallel_rng_kind(
kind = "Xoshiro256++",
set_kind = dqrng::dqRNGkind,
next_stream = dqrng:::next_stream,
next_substream = dqrng:::next_substream,
is_seed = dqrng:::is_xoshiro256pp_seed,
as_seed = dqrng:::as_xoshiro256pp_seed
)
# *not* identical results
future_sapply(1:8, \(x){sum(dqrunif(1e6))}, future.seed = 42)
#> [1] 499899.5 499831.8 500003.0 500008.3 499772.2 499348.9 499966.8 499665.0
future_sapply(1:8, \(x){sum(dqrunif(1e6))}, future.seed = 42)
#> [1] 500143.0 500230.2 500017.7 499620.6 499994.3 499810.3 500185.4 499568.3Created on 2025-06-30 with reprex v2.1.1 |
|
And executing the base variant with the So it looks like you are assigning the generated seeds directly to |
Yes. It has slowly started to sink in what you've told me about dqRNG is doing its own thing. So, at a minimum, it sounds like BTW, for parallel processing, we also need to initiate each parallel worker using |
Yes, that sounds right. Since the other functions use
I am not sure. I am still trying to wrap my head around the
That is a very good idea. Maybe it would make sense to have a general |
|
I was curious to have a closer look at this:
Basically I have build a small package that takes some of my ideas for "how I would implement an RNG package today" and puts them into practice: https://rstub.codeberg.page/xoshiro/ At the moment it is not really fast, since there is a lot of back and forth going on with user defined RNGs. For good performance one needs special purpose functions. However, it does make use of library(xoshiro)
library(future.apply)
#> Loading required package: future
plan(multisession)
void <- future:::parallel_rng_kind(
kind = "user",
set_kind = base::RNGkind,
next_stream = xoshiro:::next_stream,
next_substream = xoshiro:::next_substream,
is_seed = xoshiro:::is_xoshiro256pp_seed,
as_seed = xoshiro:::as_xoshiro256pp_seed
)
# identical results
future_sapply(1:8, \(x){sum(runif(1e6))}, future.packages = "xoshiro", future.seed = 42)
#> [1] 500076.5 500328.9 500158.6 499847.2 500373.7 499792.5 499822.4 499186.2
future_sapply(1:8, \(x){sum(runif(1e6))}, future.packages = "xoshiro", future.seed = 42)
#> [1] 500076.5 500328.9 500158.6 499847.2 500373.7 499792.5 499822.4 499186.2Note that I think this only works because the RNG gets automatically registered as a user defined RNG upon loading the package. My |
fixes #95