Group by Columns and Summarize a Column into a List

Group by columns and summarize a column into a list

We can use toString to concat the unique elements in 'cluster' together after grouping by 'client'

r1 <- sample_df %>% 
group_by(client, date) %>%
summarise(cluster = toString(unique(cluster)))

Or another option would be to create a list column

r2 <- sample_df %>%
group_by(client, date) %>%
summarise(cluster = list(unique(cluster)))

which we can unnest

library(tidyr)
r2 %>%
ungroup %>%
unnest()

Group by two column and summarize multiple columns

We can use summarise with across from dplyr version > = 1.00

library(dplyr)
df %>%
group_by(State, Date) %>%
summarise(across(everything(), sum, na.rm = TRUE), .groups = 'drop')
# A tibble: 6 x 4
# State Date Female Male
# <chr> <chr> <int> <int>
#1 Cali 05/06/2005 3 2
#2 Cali 10/06/2005 4 3
#3 NY 11/06/2005 10 5
#4 NY 12/06/2005 11 6
#5 Texas 01/01/2004 5 3
#6 Texas 02/01/2004 5 4

Or using aggregate from base R

aggregate(.~ State + Date, df, sum, na.rm = TRUE)

data

df <-  structure(list(State = c("Texas", "Texas", "Texas", "Cali", "Cali", 
"Cali", "Cali", "NY", "NY"), Female = c(2L, 3L, 5L, 1L, 2L, 3L,
1L, 10L, 11L), Male = c(2L, 1L, 4L, 1L, 1L, 1L, 2L, 5L, 6L),
Date = c("01/01/2004", "01/01/2004", "02/01/2004", "05/06/2005",
"05/06/2005", "10/06/2005", "10/06/2005", "11/06/2005", "12/06/2005"
)), class = "data.frame", row.names = c(NA, -9L))

How to group and summarise each data frame in a list of data frames

Using purrr::map and summarise at columns contain a letteral dot \\. using matches helper.

library(dplyr)
library(purrr)
map(all, ~.x %>%
#group_by_at(vars(matches('one$|two$|three$'))) %>% #column ends with one, two, or three
group_by_at(1) %>%
summarise_at(vars(matches('\\.')),sum))
#summarise_at(vars(matches('\\.')),list(sum=~sum))) #2nd option

[[1]]
# A tibble: 3 x 2
one one.1
<fct> <dbl>
1 blue 1
2 green 0
3 red 3

[[2]]
# A tibble: 4 x 2
two two.2
<fct> <dbl>
1 blue 1
2 green 1
3 red 1
4 yellow 1

[[3]]
# A tibble: 4 x 2
three three.3
<fct> <dbl>
1 blue 0
2 green 2
3 white 1
4 yellow 1

Group By - but sum one column, and show original columns

df_save =df_orig.loc[:, ["A", "C", "E"]]
df_agg = df_orig.groupby("A").agg({"B": "sum", "D" : "sum"}).reset_index()
df_merged = df_save.merge(df_agg)
for c in ["B", "D"] :
df_merged.loc[df_merged[c].duplicated(), c] = ''
















































ACEBD
AppleGreenX101
PearBrownY15523
PearYellowZ
BananaYellowP44
PlumRedR25

Group/summarize on nested tibble to append column lists

You can unlist the models to bring them in one vector and stire it in list again.

library(dplyr)
my_df %>% group_by(gear) %>% summarise(models = list(unlist(models)))

# gear models
# <dbl> <list>
#1 3 <chr [15]>
#2 4 <chr [12]>
#3 5 <chr [5]>

R - create summary table of means and counts by group for multiple columns

We may group by 'group' and summarise across the numeric columns to get the mean and the count of non-NA (sum(!is.na)

library(dplyr)
df %>%
group_by(group) %>%
summarise(across(where(is.numeric),
list(mean = ~ mean(.x, na.rm = TRUE), count = ~ sum(!is.na(.x)))))

Dplyr Summarise Groups as Column Names

There's lots of ways to go about it, but I would simplify it by pivoting to a longer data frame initially, and then grouping by var and group. Then you can just pivot wider to get the final result you want. Note that I used summarize(across()) which replaces the deprecated summarize_all(), even though with a single column could've just manually specified Mean = ... and Sum = ....

set.seed(123)

test_df %>%
pivot_longer(
var1:var2,
names_to = "var"
) %>%
group_by(Group, var) %>%
summarize(
across(
everything(),
list(Mean = mean, Sum = sum),
.names = "{.fn}"
),
.groups = "drop"
) %>%
pivot_wider(
names_from = "Group",
values_from = c(Mean, Sum),
names_glue = "{Group}_{.value}"
)
#> # A tibble: 2 × 7
#> var A_Mean B_Mean C_Mean A_Sum B_Sum C_Sum
#> <chr> <dbl> <dbl> <dbl> <int> <int> <int>
#> 1 var1 1 2.5 3.2 1 10 16
#> 2 var2 5 4.5 4.4 5 18 22


Related Topics



Leave a reply



Submit