all combinations/crossing by group
Got this solution, maybe someone can benefit from it.
df %>%
group_by(Group) %>%
group_split() -> x
purrr::map_dfr(1:length(x),
function(i) {crossing(Group = unique(x[[i]]$Group),
Country = unique(x[[i]]$Country),
Product = unique(x[[i]]$Product))
}
)
Determining All Combinations but With a Grouping Variable
You could try
lst <- expand.grid(Input)
minlen <- min(lengths(Input))
res <- Filter(
length,
combn(
1:nrow(lst),
minlen,
function(x) {
if (all(!apply(lst[x, ], 2, anyDuplicated))) {
lst[x, ]
}
},
simplify = FALSE
)
)
which gives
> res
[[1]]
Var1 Var2 Var3
1 1 3 5
8 2 4 6
[[2]]
Var1 Var2 Var3
1 1 3 5
12 2 4 7
[[3]]
Var1 Var2 Var3
2 2 3 5
7 1 4 6
[[4]]
Var1 Var2 Var3
2 2 3 5
11 1 4 7
[[5]]
Var1 Var2 Var3
3 1 4 5
6 2 3 6
[[6]]
Var1 Var2 Var3
3 1 4 5
10 2 3 7
[[7]]
Var1 Var2 Var3
4 2 4 5
5 1 3 6
[[8]]
Var1 Var2 Var3
4 2 4 5
9 1 3 7
[[9]]
Var1 Var2 Var3
5 1 3 6
12 2 4 7
[[10]]
Var1 Var2 Var3
6 2 3 6
11 1 4 7
[[11]]
Var1 Var2 Var3
7 1 4 6
10 2 3 7
[[12]]
Var1 Var2 Var3
8 2 4 6
9 1 3 7
count number of combinations by group
Create a "combination" column in summarise
, we can count this column afterwards.
An easy way to count the category is to order them at the beginning, then in this case they will all be in the same order.
library(dplyr)
dd %>%
group_by(id) %>%
arrange(id, cat) %>%
summarize(combination = paste0(cat, collapse = "-"), .groups = "drop") %>%
count(combination)
# A tibble: 3 x 2
combination n
<chr> <int>
1 c-d-f 1
2 c-f 2
3 d-f 2
How to get all combinations of 2 from a grouped column in a data frame
You can do :
library(dplyr)
data <- input %>%
group_by(col1) %>%
summarise(col2 = t(combn(col2, 2)))
cbind(data[1], data.frame(data$col2))
# col1 X1 X2
# <dbl> <chr> <chr>
#1 1 A B
#2 1 A C
#3 1 B C
#4 2 E F
Create combinations by group and sum
You can create pairwise indices using combn()
and expand the data frame with these using slice()
. Then just group by these row pairs and summarise. I'm assuming you want pairwise combinations but this can be adapted for larger sets if needed. Some code to handle groups < 2 is included but can be removed if these don't exist in your data.
library(dplyr)
library(purrr)
df1 %>%
group_by(id) %>%
slice(c(combn(seq(n()), min(n(), 2)))) %>%
mutate(id2 = (row_number()-1) %/% 2) %>%
group_by(id, id2) %>%
summarise(name = toString(name),
across(where(is.numeric), sum), .groups = "drop") %>%
select(-id2) %>%
bind_rows(df1 %>%
group_by(id) %>%
filter(n() > 1), .) %>%
arrange(id) %>%
ungroup()
# A tibble: 6 × 4
id name number value
<chr> <chr> <int> <int>
1 a bob 1 1
2 a jane 2 2
3 a bob, jane 3 3
4 b mark 1 1
5 b brittney 2 2
6 b mark, brittney 3 3
Edit:
To adapt for all possible combinations you can iterate over the values up to the max group size. Using edited data which has a couple of rows added to the first group:
map_df(seq(max(table(df2$id))), ~
df2 %>%
group_by(id) %>%
slice(c(combn(seq(n()), .x * (.x <= n())))) %>%
mutate(id2 = (row_number() - 1) %/% .x) %>%
group_by(id, id2) %>%
summarise(name = toString(name),
across(where(is.numeric), sum), .groups = "drop")
) %>%
select(-id2) %>%
arrange(id)
# A tibble: 18 × 4
id name number value
<chr> <chr> <int> <int>
1 a bob 1 1
2 a jane 2 2
3 a sophie 1 1
4 a jeremy 2 2
5 a bob, jane 3 3
6 a bob, sophie 2 2
7 a bob, jeremy 3 3
8 a jane, sophie 3 3
9 a jane, jeremy 4 4
10 a sophie, jeremy 3 3
11 a bob, jane, sophie 4 4
12 a bob, jane, jeremy 5 5
13 a bob, sophie, jeremy 4 4
14 a jane, sophie, jeremy 5 5
15 a bob, jane, sophie, jeremy 6 6
16 b mark 3 5
17 b brittney 4 6
18 b mark, brittney 7 11
Data for df2
:
df2 <- structure(list(id = c("a", "a", "a", "a", "b", "b"), name = c("bob",
"jane", "sophie", "jeremy", "mark", "brittney"), number = c(1L,
2L, 1L, 2L, 3L, 4L), value = c(1L, 2L, 1L, 2L, 5L, 6L)), class = "data.frame", row.names = c(NA,
-6L))
Related Topics
Replace Missing Values (Na) in One Data Set with Values from Another Where Columns Match
R: Using a String as an Argument to Mutate Verb in Dplyr
Ggplot2: Reorder Bars from Highest to Lowest in Each Facet
Accessing Columns in Data.Table Using a Character Vector of Column Names
Dplyr Piping Data - Difference Between '.' and '.X'
Replace Multiple Values in a Column for a Single One
Display Row Names in a Data.Table Object
A Way to Always Dodge a Histogram
Generating All Permutations of N Balls in M Bins
Error with Select Function from Dplyr
Create Sequential Counter That Restarts on a Condition Within Panel Data Groups
"Set Difference" Between Two Vectors with Duplicate Values
Set the Order of a Stacked Bar Chart by the Value of One of the Variables
Xpath and Namespace Specification for Xml Documents with an Explicit Default Namespace