R: generate all permutations of vector without duplicated elements
Using gtools
package:
require(gtools)
permutations(n = 9, r = 3, v = 1:9)
# n -> size of source vector
# r -> size of target vector
# v -> source vector, defaults to 1:n
# repeats.allowed = FALSE (default)
R: generate all unique permutations from a vector
Use it like this:
library(RcppAlgos)
tab <- table(ex)
permuteGeneral(v = names(tab), freq = tab)
## [,1] [,2] [,3]
## [1,] "sp1" "sp2" "sp2"
## [2,] "sp2" "sp1" "sp2"
## [3,] "sp2" "sp2" "sp1"
How generate all posible combinations of a vector without repetition in R?
Here is a solution with combn
(credits to @Joseph Wood for making my answer much simpler):
as.numeric(unlist(sapply(vec, function(y) combn(vec, y, paste, collapse = ""))))
Result:
[1] 1 2 3 4 5 12 13 14 15 23 24 25 34 35
[15] 45 123 124 125 134 135 145 234 235 245 345 1234 1235 1245
[29] 1345 2345 12345
You can also make it a function:
all_combn = function(vec){
as.numeric(unlist(sapply(vec, function(y) combn(vec, y, paste, collapse = ""))))
}
Result:
> all_combn(1:5)
[1] 1 2 3 4 5 12 13 14 15 23 24 25 34 35
[15] 45 123 124 125 134 135 145 234 235 245 345 1234 1235 1245
[29] 1345 2345 12345
> all_combn(1:6)
[1] 1 2 3 4 5 6 12 13 14 15 16 23
[13] 24 25 26 34 35 36 45 46 56 123 124 125
[25] 126 134 135 136 145 146 156 234 235 236 245 246
[37] 256 345 346 356 456 1234 1235 1236 1245 1246 1256 1345
[49] 1346 1356 1456 2345 2346 2356 2456 3456 12345 12346 12356 12456
[61] 13456 23456 123456
How to create permutations of a logical vector?
You where missing repeats.allowed=T
:
gtools::permutations(2,2, c(T,F), repeats.allowed = T)
[,1] [,2]
[1,] FALSE FALSE
[2,] FALSE TRUE
[3,] TRUE FALSE
[4,] TRUE TRUE
You can make your custom function around permutations
:
my_permute <- function(vect, n, repeats = TRUE) {
gtools::permutations(length(vect), n, vect, repeats.allowed = repeats)
}
my_permute(vect=c(T,F), n=2)
Example with more elements:
my_permute(letters[1:3], n=3)
Generating all distinct permutations of a list in R
combinat::permn
will do that work:
> library(combinat)
> permn(letters[1:3])
[[1]]
[1] "a" "b" "c"
[[2]]
[1] "a" "c" "b"
[[3]]
[1] "c" "a" "b"
[[4]]
[1] "c" "b" "a"
[[5]]
[1] "b" "c" "a"
[[6]]
[1] "b" "a" "c"
Note that calculation is huge if the element is large.
Generate all unique combinations from a vector with repeating elements
Use combn()
with lapply()
should do the trick.
x <- c('red', 'blue', 'green', 'red', 'green', 'red')
lapply(1:3, function(y) combn(x, y))
# [[1]]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] "red" "blue" "green" "red" "green" "red"
# [[2]]
# [,1] [,2] [,3] [,4] [,5] [,6] ...
# [1,] "red" "red" "red" "red" "red" "blue" ...
# [2,] "blue" "green" "red" "green" "red" "green" ...
# [[3]]
# [,1] [,2] [,3] [,4] [,5] [,6] ...
# [1,] "red" "red" "red" "red" "red" "red" ...
# [2,] "blue" "blue" "blue" "blue" "green" "green" ...
# [3,] "green" "red" "green" "red" "red" "green" ...
All unique combinations
lapply(cc, function(y)
y[,!duplicated(apply(y, 2, paste, collapse="."))]
)
[[1]]
[1] "red" "blue" "green"
[[2]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] "red" "red" "red" "blue" "blue" "green" "green"
[2,] "blue" "green" "red" "green" "red" "red" "green"
[[3]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] ...
[1,] "red" "red" "red" "red" "red" "red" "blue" ...
[2,] "blue" "blue" "green" "green" "red" "red" "green" ...
[3,] "green" "red" "red" "green" "green" "red" "red" ...
Although strictly speaking those aren't all unique combinations, as some of them are permutations of each other.
Properly unique combinations
lapply(cc, function(y)
y[,!duplicated(apply(y, 2, function(z) paste(sort(z), collapse=".")))]
)
# [[1]]
# [1] "red" "blue" "green"
# [[2]]
# [,1] [,2] [,3] [,4] [,5]
# [1,] "red" "red" "red" "blue" "green"
# [2,] "blue" "green" "red" "green" "green"
# [[3]]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] "red" "red" "red" "red" "red" "blue"
# [2,] "blue" "blue" "green" "green" "red" "green"
# [3,] "green" "red" "red" "green" "red" "green"
R - Expand Grid Without Duplicates
In RcppAlgos
*, there is a function called comboGrid
that does the trick:
library(RcppAlgos) ## as of v2.4.3
comboGrid(X1, X2, X3, repetition = F)
# Var1 Var2 Var3
# [1,] "x" "A" "C"
# [2,] "x" "A" "G"
# [3,] "x" "A" "y"
# [4,] "x" "B" "C"
# [5,] "x" "B" "G"
# [6,] "x" "B" "y"
# [7,] "x" "C" "G"
# [8,] "x" "C" "y"
# [9,] "y" "A" "C"
# [10,] "y" "A" "G"
# [11,] "y" "B" "C"
# [12,] "y" "B" "G"
# [13,] "y" "C" "G"
# [14,] "z" "A" "C"
# [15,] "z" "A" "G"
# [16,] "z" "A" "y"
# [17,] "z" "B" "C"
# [18,] "z" "B" "G"
# [19,] "z" "B" "y"
# [20,] "z" "C" "G"
# [21,] "z" "C" "y"
Large Test
set.seed(42)
rnd_lst <- lapply(1:11, function(x) {
sort(sample(LETTERS, sample(26, 1)))
})
## Number of results that expand.grid would return if your machine
## had enough memory... over 300 trillion!!!
prettyNum(prod(lengths(rnd_lst)), big.mark = ",")
# [1] "365,634,846,720"
exp_grd_test <- expand.grid(rnd_lst)
# Error: vector memory exhausted (limit reached?)
system.time(cmb_grd_test <- comboGrid(rnd_lst, repetition=FALSE))
# user system elapsed
# 9.866 0.330 10.196
dim(cmb_grd_test)
# [1] 3036012 11
head(cmb_grd_test)
# Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10 Var11
# [1,] "A" "E" "C" "B" "D" "G" "F" "H" "J" "I" "K"
# [2,] "A" "E" "C" "B" "D" "G" "F" "H" "J" "I" "L"
# [3,] "A" "E" "C" "B" "D" "G" "F" "H" "J" "I" "M"
# [4,] "A" "E" "C" "B" "D" "G" "F" "H" "J" "I" "N"
# [5,] "A" "E" "C" "B" "D" "G" "F" "H" "J" "I" "O"
# [6,] "A" "E" "C" "B" "D" "G" "F" "H" "J" "I" "P"
* I am the author of RcppAlgos
Permute all unique enumerations of a vector in R
EDIT: Here's a faster answer; again based on the ideas of Louisa Grey and Bryce Wagner, but with faster R code thanks to better use of matrix indexing. It's quite a bit faster than my original:
> ddd <- c(1,0,3,4,1,0,0,3,0,4)
> system.time(up1 <- uniqueperm(d))
user system elapsed
0.183 0.000 0.186
> system.time(up2 <- uniqueperm2(d))
user system elapsed
0.037 0.000 0.038
And the code:
uniqueperm2 <- function(d) {
dat <- factor(d)
N <- length(dat)
n <- tabulate(dat)
ng <- length(n)
if(ng==1) return(d)
a <- N-c(0,cumsum(n))[-(ng+1)]
foo <- lapply(1:ng, function(i) matrix(combn(a[i],n[i]),nrow=n[i]))
out <- matrix(NA, nrow=N, ncol=prod(sapply(foo, ncol)))
xxx <- c(0,cumsum(sapply(foo, nrow)))
xxx <- cbind(xxx[-length(xxx)]+1, xxx[-1])
miss <- matrix(1:N,ncol=1)
for(i in seq_len(length(foo)-1)) {
l1 <- foo[[i]]
nn <- ncol(miss)
miss <- matrix(rep(miss, ncol(l1)), nrow=nrow(miss))
k <- (rep(0:(ncol(miss)-1), each=nrow(l1)))*nrow(miss) +
l1[,rep(1:ncol(l1), each=nn)]
out[xxx[i,1]:xxx[i,2],] <- matrix(miss[k], ncol=ncol(miss))
miss <- matrix(miss[-k], ncol=ncol(miss))
}
k <- length(foo)
out[xxx[k,1]:xxx[k,2],] <- miss
out <- out[rank(as.numeric(dat), ties="first"),]
foo <- cbind(as.vector(out), as.vector(col(out)))
out[foo] <- d
t(out)
}
It doesn't return the same order, but after sorting, the results are identical.
up1a <- up1[do.call(order, as.data.frame(up1)),]
up2a <- up2[do.call(order, as.data.frame(up2)),]
identical(up1a, up2a)
For my first attempt, see the edit history.
Related Topics
Showing Different Axis Labels Using Ggplot2 with Facet_Wrap
Barplot with 2 Variables Side by Side
How to Transpose a Dataframe in Tidyverse
How to Change X-Axis Tick Label Names, Order and Boxplot Colour Using R Ggplot
How to Preserve Base Data Frame Rownames Upon Filtering in Dplyr Chain
Parallel Execution of Random Forest in R
Alternatives to Nested Ifelse Statements in R
Calculate Sum of a List of Variables by Group
Fastest Way for Filling-In Missing Dates for Data.Table
R Convert Between Zoo Object and Data Frame, Results Inconsistent for Different Numbers of Columns
Arithmetic Operations on R Factors
Grouped Barplot with Cut Y Axis
Create Plots Based on Radio Button Selection R Shiny
R: How to Recode Multiple Variables at Once