Generate All Combinations, of All Lengths, in R, from a Vector

R: all combinations of all lengths from a vector of elements each with 2 conditions

If you still wanted to keep the NA values in there, then just think of it as having a different value than "+" or "-", you just also have the NA value. You could do something like

markers <- LETTERS[1:5]

test <- expand.grid(lapply(seq(markers), function(x) c("+","-","NA")),stringsAsFactors=FALSE)

apply(test,1,function(x){paste0(ifelse(x=="NA", "NA", markers),ifelse(x=="NA","",x),collapse = "/")})

Generate all combinations of vector with consecutive occurrences is considered as single occurrence

Here's an approach using the very fast arrangements package for permutations. We calculate the permutations of integers corresponding to the unique elements of the input and then do some clever indexing to output the corresponding swaps. This is extremely fast on small examples and does pretty well on larger example - on my computer it took a little less than 7 seconds to generate the 10! = 3628800 swaps on input of size 30 with 10 unique elements. The results are conveniently returned in a list.

library(arrangements)

all_swaps = function(x) {
ux = unique(x)
xi = as.integer(factor(x))
perm = permutations(seq_along(ux))
apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}

Test cases from the question:

# n = 2
all_swaps(c("a","a","a","b","b","b","a","a","b","b"))
# [[1]]
# [1] "a" "a" "a" "b" "b" "b" "a" "a" "b" "b"
#
# [[2]]
# [1] "b" "b" "b" "a" "a" "a" "b" "b" "a" "a"

## n = 3
all_swaps(c("a","a","a","b","b","b","c","c","c"))
# [[1]]
# [1] "a" "a" "a" "b" "b" "b" "c" "c" "c"
#
# [[2]]
# [1] "a" "a" "a" "c" "c" "c" "b" "b" "b"
#
# [[3]]
# [1] "b" "b" "b" "a" "a" "a" "c" "c" "c"
#
# [[4]]
# [1] "b" "b" "b" "c" "c" "c" "a" "a" "a"
#
# [[5]]
# [1] "c" "c" "c" "a" "a" "a" "b" "b" "b"
#
# [[6]]
# [1] "c" "c" "c" "b" "b" "b" "a" "a" "a"

A shorter demo with 3 unique elements in a "complex" case where the elements are not all consecutive:

all_swaps(c("a", "b", "b", "c", "b"))
# [[1]]
# [1] "a" "b" "b" "c" "b"
#
# [[2]]
# [1] "a" "c" "c" "b" "c"
#
# [[3]]
# [1] "b" "a" "a" "c" "a"
#
# [[4]]
# [1] "b" "c" "c" "a" "c"
#
# [[5]]
# [1] "c" "a" "a" "b" "a"
#
# [[6]]
# [1] "c" "b" "b" "a" "b"

A larger case:

# n = 10
set.seed(47)
start_t = Sys.time()
n10 = all_swaps(sample(letters[1:10], size = 30, replace = TRUE))
end_t = Sys.time()
end_t - start_t
# Time difference of 6.711215 secs
length(n10)
# [1] 3628800


Benchmarking

Benchmarking my answer with Maël's and ThomasIsCoding's, my method relying on the arrangements package is quick and memory efficient. ThomasIsCoding's answer can be improved by changing from pracma::perms to arrangements::permutations--the memory usage is especially improved--but my version still performs better. Maël's uses a lot of time and memory. I'll lead with results, code to reproduce is below.

## 5 Unique Elements
arrange(b5, desc(`itr/sec`))
# # A tibble: 4 × 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
# 1 GregorThomas 2.31ms 12.6ms 77.5 5.77KB 0 40 0 516ms
# 2 ThomasIsCodingArr(in5) 9.3ms 20.5ms 47.4 19.55KB 0 24 0 506ms
# 3 ThomasIsCoding(in5) 12.57ms 22.7ms 41.2 45.41KB 0 22 0 534ms
# 4 Mael 963.64ms 963.6ms 1.04 1.24MB 0 1 0 964ms
# # … with 4 more variables: result <list>, memory <list>, time <list>, gc <list>

## 9 Unique Elements - memory allocation is important
arrange(b9, desc(`itr/sec`))
# # A tibble: 2 × 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list>
# 1 GregorThomas 1.8s 1.8s 0.556 27.7MB 0 1 0 1.8s <NULL>
# 2 ThomasIsCoding(in9) 2.5s 2.5s 0.400 230.8MB 0.400 1 1 2.5s <NULL>
# # … with 3 more variables: memory <list>, time <list>, gc <list>

Benchmarking code:

## Functions
library(arrangements)
library(pracma)
ThomasIsCoding <- function(x) {
idx <- match(x, unique(x))
m <- asplit(matrix(unique(x)[perms(1:max(idx))], ncol = max(idx)), 1)
Map(`[`, m, list(idx))
}
ThomasIsCodingArr <- function(x) {
idx <- match(x, unique(x))
m <- asplit(matrix(unique(x)[permutations(1:max(idx))], ncol = max(idx)), 1)
Map(`[`, m, list(idx))
}
Mael <- function(vec){
uni <- unique(vec)
size <- length(uni)
pVec <- paste(uni, collapse = "")
grid <- expand.grid(rep(list(uni), size))
expanded <- grid[apply(grid, 1, function(x) length(unique(x))) == size,]
p <- unname(apply(expanded, 1, paste0, collapse = ""))

lapply(p, function(x) chartr(pVec, x, vec))
}
all_swaps = function(x) {
ux = unique(x)
xi = as.integer(factor(x))
perm = permutations(seq_along(ux))
apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}

set.seed(47)
in5 = c(sample(letters[1:5], 5), sample(letters[1:5], 5, replace = TRUE))

b5 = bench::mark(
GregorThomas = all_swaps(in5),
Mael = Mael(in5),
ThomasIsCoding(in5),
ThomasIsCodingArr(in5),
check = FALSE
)

Unordered combinations of all lengths

You could apply a sequence the length of x over the m argument of the combn() function.

x <- c("red", "blue", "black")
do.call(c, lapply(seq_along(x), combn, x = x, simplify = FALSE))
# [[1]]
# [1] "red"
#
# [[2]]
# [1] "blue"
#
# [[3]]
# [1] "black"
#
# [[4]]
# [1] "red" "blue"
#
# [[5]]
# [1] "red" "black"
#
# [[6]]
# [1] "blue" "black"
#
# [[7]]
# [1] "red" "blue" "black"

If you prefer a matrix result, then you can apply stringi::stri_list2matrix() to the list above.

stringi::stri_list2matrix(
do.call(c, lapply(seq_along(x), combn, x = x, simplify = FALSE)),
byrow = TRUE
)
# [,1] [,2] [,3]
# [1,] "red" NA NA
# [2,] "blue" NA NA
# [3,] "black" NA NA
# [4,] "red" "blue" NA
# [5,] "red" "black" NA
# [6,] "blue" "black" NA
# [7,] "red" "blue" "black"

All combinations of all sizes?

If you prefer compact code

Map(combn, list(x), seq_along(x))
## [[1]]
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4

## [[2]]
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 1 1 2 2 3
## [2,] 2 3 4 3 4 4

## [[3]]
## [,1] [,2] [,3] [,4]
## [1,] 1 1 1 2
## [2,] 2 2 3 3
## [3,] 3 4 4 4

## [[4]]
## [,1]
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4

To avoid repetition, you'll have to deal with nested list but you can simplify the result using unlist

res <- Map(combn, list(x), seq_along(x), simplify = FALSE)
unlist(res, recursive = FALSE)
## [[1]]
## [1] 1

## [[2]]
## [1] 2

## [[3]]
## [1] 3

## [[4]]
## [1] 4

## [[5]]
## [1] 1 2

## [[6]]
## [1] 1 3

## [[7]]
## [1] 1 4

## [[8]]
## [1] 2 3

## [[9]]
## [1] 2 4

## [[10]]
## [1] 3 4

## [[11]]
## [1] 1 2 3

## [[12]]
## [1] 1 2 4

## [[13]]
## [1] 1 3 4

## [[14]]
## [1] 2 3 4

## [[15]]
## [1] 1 2 3 4

Combinations of vector with sub-vector length n

I can answer the whole question, but it will take a bit longer. This should give you the flavour of the answer.

The package combinat has a function called permn which gives you the all the permutations of a vector. You want this, but not quite. What you need is the permutations of all the blocks. So in your first example you have two blocks of length two, and in your second example you have three blocks of length three. If we look at the first, and think about ordering the blocks:

> library(combinat)
> numBlocks = 2
> permn(1:numBlocks)
[[1]]
[1] 1 2

[[2]]
[1] 2 1

So I hope you can see that the first permutation would take the blocks b1 = c(1,2), and b2 = c(3,4) and order them c(b1,b2), and the second would order them c(b2,b1).

Equally if you had three blocks, b1 = 1:3; b2 = 4:6; b3 = 7:9 then

permn(1:3)
[[1]]
[1] 1 2 3

[[2]]
[1] 1 3 2

[[3]]
[1] 3 1 2

[[4]]
[1] 3 2 1

[[5]]
[1] 2 3 1

[[6]]
[1] 2 1 3

gives you the ordering of these blocks. The more general solution is figuring out how to move the blocks around, but that isn't too hard.

Update: Using my multicool package. Note co-lexical ordering (coolex) isn't the order you'd come up with by yourself.

library(multicool)

combs = function(v, blockLength){
if(length(v) %% blockLength != 0){
stop("vector length must be divisible by blockLength")
}

numBlocks = length(v) / blockLength
blockWise = matrix(v, nc = blockLength, byrow = TRUE)

m = initMC(1:numBlocks)
Perms = allPerm(m)

t(apply(Perms, 1, function(p)as.vector(t(blockWise[p,]))))
}
> combs(1:4, 2)
[,1] [,2] [,3] [,4]
[1,] 3 4 1 2
[2,] 1 2 3 4
> combs(1:9, 3)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] 7 8 9 4 5 6 1 2 3
[2,] 1 2 3 7 8 9 4 5 6
[3,] 7 8 9 1 2 3 4 5 6
[4,] 4 5 6 7 8 9 1 2 3
[5,] 1 2 3 4 5 6 7 8 9
[6,] 4 5 6 1 2 3 7 8 9

Generate all unique combinations from a vector with repeating elements

Use combn() with lapply() should do the trick.

x <- c('red', 'blue', 'green', 'red', 'green', 'red')

lapply(1:3, function(y) combn(x, y))

# [[1]]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] "red" "blue" "green" "red" "green" "red"

# [[2]]
# [,1] [,2] [,3] [,4] [,5] [,6] ...
# [1,] "red" "red" "red" "red" "red" "blue" ...
# [2,] "blue" "green" "red" "green" "red" "green" ...

# [[3]]
# [,1] [,2] [,3] [,4] [,5] [,6] ...
# [1,] "red" "red" "red" "red" "red" "red" ...
# [2,] "blue" "blue" "blue" "blue" "green" "green" ...
# [3,] "green" "red" "green" "red" "red" "green" ...

All unique combinations

lapply(cc, function(y)
y[,!duplicated(apply(y, 2, paste, collapse="."))]
)

[[1]]
[1] "red" "blue" "green"

[[2]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] "red" "red" "red" "blue" "blue" "green" "green"
[2,] "blue" "green" "red" "green" "red" "red" "green"

[[3]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] ...
[1,] "red" "red" "red" "red" "red" "red" "blue" ...
[2,] "blue" "blue" "green" "green" "red" "red" "green" ...
[3,] "green" "red" "red" "green" "green" "red" "red" ...

Although strictly speaking those aren't all unique combinations, as some of them are permutations of each other.

Properly unique combinations

lapply(cc, function(y)
y[,!duplicated(apply(y, 2, function(z) paste(sort(z), collapse=".")))]
)

# [[1]]
# [1] "red" "blue" "green"

# [[2]]
# [,1] [,2] [,3] [,4] [,5]
# [1,] "red" "red" "red" "blue" "green"
# [2,] "blue" "green" "red" "green" "green"

# [[3]]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] "red" "red" "red" "red" "red" "blue"
# [2,] "blue" "blue" "green" "green" "red" "green"
# [3,] "green" "red" "red" "green" "red" "green"

Find all combinations between two vectors of different length

A small change to your suggestion solves the problem:

#Assign same, constant index so the merge function will fill
x_all$index <- 1
y_all$index <- 1

#Merge to get all information
x_y_all <- merge(x_all, y_all, by = "index")

#Delete the index
x_y_all$index <- NULL

Generate list of all possible combinations of elements of vector

You're looking for expand.grid.

expand.grid(0:1, 0:1, 0:1)

Or, for the long case:

n <- 14
l <- rep(list(0:1), n)

expand.grid(l)

R - find all possible combinations of numbers WITH constraints on combination length

@chinsoon12 suggested the package RcppAlgos. I investigated it and found that the following works:

comboIter(1:10000, 8000)


Related Topics



Leave a reply



Submit