Combinations by Group in R

all combinations/crossing by group

Got this solution, maybe someone can benefit from it.

df %>% 
group_by(Group) %>%
group_split() -> x

purrr::map_dfr(1:length(x),

function(i) {crossing(Group = unique(x[[i]]$Group),
Country = unique(x[[i]]$Country),
Product = unique(x[[i]]$Product))
}
)

Determining All Combinations but With a Grouping Variable

You could try

lst <- expand.grid(Input)
minlen <- min(lengths(Input))
res <- Filter(
length,
combn(
1:nrow(lst),
minlen,
function(x) {
if (all(!apply(lst[x, ], 2, anyDuplicated))) {
lst[x, ]
}
},
simplify = FALSE
)
)

which gives

> res
[[1]]
Var1 Var2 Var3
1 1 3 5
8 2 4 6

[[2]]
Var1 Var2 Var3
1 1 3 5
12 2 4 7

[[3]]
Var1 Var2 Var3
2 2 3 5
7 1 4 6

[[4]]
Var1 Var2 Var3
2 2 3 5
11 1 4 7

[[5]]
Var1 Var2 Var3
3 1 4 5
6 2 3 6

[[6]]
Var1 Var2 Var3
3 1 4 5
10 2 3 7

[[7]]
Var1 Var2 Var3
4 2 4 5
5 1 3 6

[[8]]
Var1 Var2 Var3
4 2 4 5
9 1 3 7

[[9]]
Var1 Var2 Var3
5 1 3 6
12 2 4 7

[[10]]
Var1 Var2 Var3
6 2 3 6
11 1 4 7

[[11]]
Var1 Var2 Var3
7 1 4 6
10 2 3 7

[[12]]
Var1 Var2 Var3
8 2 4 6
9 1 3 7

count number of combinations by group

Create a "combination" column in summarise, we can count this column afterwards.

An easy way to count the category is to order them at the beginning, then in this case they will all be in the same order.

library(dplyr)

dd %>%
group_by(id) %>%
arrange(id, cat) %>%
summarize(combination = paste0(cat, collapse = "-"), .groups = "drop") %>%
count(combination)

# A tibble: 3 x 2
combination n
<chr> <int>
1 c-d-f 1
2 c-f 2
3 d-f 2

How to get all combinations of 2 from a grouped column in a data frame

You can do :

library(dplyr)

data <- input %>%
group_by(col1) %>%
summarise(col2 = t(combn(col2, 2)))
cbind(data[1], data.frame(data$col2))

# col1 X1 X2
# <dbl> <chr> <chr>
#1 1 A B
#2 1 A C
#3 1 B C
#4 2 E F

Create combinations by group and sum

You can create pairwise indices using combn() and expand the data frame with these using slice(). Then just group by these row pairs and summarise. I'm assuming you want pairwise combinations but this can be adapted for larger sets if needed. Some code to handle groups < 2 is included but can be removed if these don't exist in your data.

library(dplyr)
library(purrr)

df1 %>%
group_by(id) %>%
slice(c(combn(seq(n()), min(n(), 2)))) %>%
mutate(id2 = (row_number()-1) %/% 2) %>%
group_by(id, id2) %>%
summarise(name = toString(name),
across(where(is.numeric), sum), .groups = "drop") %>%
select(-id2) %>%
bind_rows(df1 %>%
group_by(id) %>%
filter(n() > 1), .) %>%
arrange(id) %>%
ungroup()

# A tibble: 6 × 4
id name number value
<chr> <chr> <int> <int>
1 a bob 1 1
2 a jane 2 2
3 a bob, jane 3 3
4 b mark 1 1
5 b brittney 2 2
6 b mark, brittney 3 3

Edit:

To adapt for all possible combinations you can iterate over the values up to the max group size. Using edited data which has a couple of rows added to the first group:

map_df(seq(max(table(df2$id))), ~
df2 %>%
group_by(id) %>%
slice(c(combn(seq(n()), .x * (.x <= n())))) %>%
mutate(id2 = (row_number() - 1) %/% .x) %>%
group_by(id, id2) %>%
summarise(name = toString(name),
across(where(is.numeric), sum), .groups = "drop")
) %>%
select(-id2) %>%
arrange(id)

# A tibble: 18 × 4
id name number value
<chr> <chr> <int> <int>
1 a bob 1 1
2 a jane 2 2
3 a sophie 1 1
4 a jeremy 2 2
5 a bob, jane 3 3
6 a bob, sophie 2 2
7 a bob, jeremy 3 3
8 a jane, sophie 3 3
9 a jane, jeremy 4 4
10 a sophie, jeremy 3 3
11 a bob, jane, sophie 4 4
12 a bob, jane, jeremy 5 5
13 a bob, sophie, jeremy 4 4
14 a jane, sophie, jeremy 5 5
15 a bob, jane, sophie, jeremy 6 6
16 b mark 3 5
17 b brittney 4 6
18 b mark, brittney 7 11

Data for df2:

df2 <- structure(list(id = c("a", "a", "a", "a", "b", "b"), name = c("bob", 
"jane", "sophie", "jeremy", "mark", "brittney"), number = c(1L,
2L, 1L, 2L, 3L, 4L), value = c(1L, 2L, 1L, 2L, 5L, 6L)), class = "data.frame", row.names = c(NA,
-6L))


Related Topics



Leave a reply



Submit