How to Name the List of the Group_Split Output in Dplyr

How to name the list of the group_split output in dplyr

Lots of good answers. You can also just do:

iris %>% sample_n(size = 5) %>% 
  split(f = as.factor(.$Species))

Which will give you:

$setosa
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
4          5.5         3.5          1.3         0.2  setosa
5          5.3         3.7          1.5         0.2  setosa

$versicolor
  Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
3            5         2.3          3.3           1 versicolor

$virginica
  Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
1          7.7         2.6          6.9         2.3 virginica
2          7.2         3.0          5.8         1.6 virginica

Also works with your dataframe above:

df %>% 
  split(f = as.factor(.$Cluster))

Gives you:

$Cluster1
# A tibble: 1 x 6
  Cluster  gene_name    p_value morans_test_statistic morans_I    q_value
  <chr>    <chr>          <dbl>                 <dbl>    <dbl>      <dbl>
1 Cluster1 Grhpr     0.00000155                  4.66   0.0261 0.00000343

$Cluster11
# A tibble: 2 x 6
  Cluster   gene_name  p_value morans_test_statistic morans_I  q_value
  <chr>     <chr>        <dbl>                 <dbl>    <dbl>    <dbl>
1 Cluster11 Vimp      3.17e-62                 16.6    0.0948 1.62e-61
2 Cluster11 Fgfr1op2  2.07e- 8                  5.48   0.0310 4.98e- 8

$Cluster12
# A tibble: 1 x 6
  Cluster   gene_name p_value morans_test_statistic morans_I q_value
  <chr>     <chr>       <dbl>                 <dbl>    <dbl>   <dbl>
1 Cluster12 Pikfyve    0.0147                  2.18   0.0120  0.0245

$Cluster6
# A tibble: 1 x 6
  Cluster  gene_name  p_value morans_test_statistic morans_I  q_value
  <chr>    <chr>        <dbl>                 <dbl>    <dbl>    <dbl>
1 Cluster6 Zfp398    0.000354                  3.39   0.0188 0.000684

$Cluster8
# A tibble: 2 x 6
  Cluster  gene_name   p_value morans_test_statistic morans_I   q_value
  <chr>    <chr>         <dbl>                 <dbl>    <dbl>     <dbl>
1 Cluster8 Golga7    4.14e-  6                  4.46   0.0251 8.96e-  6
2 Cluster8 Lars2     3.93e-184                 28.9    0.165  3.48e-183

$Cluster9
# A tibble: 3 x 6
  Cluster  gene_name   p_value morans_test_statistic morans_I   q_value
  <chr>    <chr>         <dbl>                 <dbl>    <dbl>     <dbl>
1 Cluster9 Tbc1d8    3.47e- 47                  14.4   0.0815 1.58e- 46
2 Cluster9 H1f0      9.46e-131                  24.3   0.139  7.00e-130
3 Cluster9 Ankrd13a  1.43e- 38                  12.9   0.0737 5.96e- 38

How to name a list of a group_split in dplyr when grouped by more than one column

Use dplyr::group_keys() to get the grouping variables.

library(dplyr)
library(stringr)
# make grouped data frame
iris_group <- iris %>%
    group_by(Species, Petal.Width)

# get group keys
group_name_df <- group_keys(iris_group) %>%
    mutate(group_name = str_c(as.character(Species),"-",Petal.Width))

# get name for each group
group_name <- group_name_df$group_name

# assign name to each split table
df_list <- group_split(iris_group) %>%
    setNames(group_name)

> group_name_df
# A tibble: 27 x 3
   Species    Petal.Width group_name    
   <fct>            <dbl> <chr>         
 1 setosa             0.1 setosa-0.1    
 2 setosa             0.2 setosa-0.2    
 3 setosa             0.3 setosa-0.3    
 4 setosa             0.4 setosa-0.4    
 5 setosa             0.5 setosa-0.5    
 6 setosa             0.6 setosa-0.6    
 7 versicolor         1   versicolor-1  
 8 versicolor         1.1 versicolor-1.1
 9 versicolor         1.2 versicolor-1.2
10 versicolor         1.3 versicolor-1.3
# ... with 17 more rows

> df_list 
$`setosa-0.1`
# A tibble: 5 x 5
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
         <dbl>       <dbl>        <dbl>       <dbl> <fct>  
1          4.9         3.1          1.5         0.1 setosa 
2          4.8         3            1.4         0.1 setosa 
3          4.3         3            1.1         0.1 setosa 
4          5.2         4.1          1.5         0.1 setosa 
5          4.9         3.6          1.4         0.1 setosa 

$`setosa-0.2`
# A tibble: 29 x 5
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
.
.
.

How to name a list created by group_split WITHOUT breaking the pipeline

You can include group_keys() in the pipe like this:

library(dplyr)

iris %>%
  group_by(Species) %>%
  {setNames(group_split(.), group_keys(.)[[1]])}

Output

$setosa
# A tibble: 50 x 5
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
 1          5.1         3.5          1.4         0.2 setosa 
 2          4.9         3            1.4         0.2 setosa 
 3          4.7         3.2          1.3         0.2 setosa 
 4          4.6         3.1          1.5         0.2 setosa 
 5          5           3.6          1.4         0.2 setosa 
 6          5.4         3.9          1.7         0.4 setosa 
 7          4.6         3.4          1.4         0.3 setosa 
 8          5           3.4          1.5         0.2 setosa 
 9          4.4         2.9          1.4         0.2 setosa 
10          4.9         3.1          1.5         0.1 setosa 
# … with 40 more rows

$versicolor
# A tibble: 50 x 5
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species   
          <dbl>       <dbl>        <dbl>       <dbl> <fct>     
 1          7           3.2          4.7         1.4 versicolor
 2          6.4         3.2          4.5         1.5 versicolor
 3          6.9         3.1          4.9         1.5 versicolor
 4          5.5         2.3          4           1.3 versicolor
 5          6.5         2.8          4.6         1.5 versicolor
 6          5.7         2.8          4.5         1.3 versicolor
 7          6.3         3.3          4.7         1.6 versicolor
 8          4.9         2.4          3.3         1   versicolor
 9          6.6         2.9          4.6         1.3 versicolor
10          5.2         2.7          3.9         1.4 versicolor
# … with 40 more rows

$virginica
# A tibble: 50 x 5
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species  
          <dbl>       <dbl>        <dbl>       <dbl> <fct>    
 1          6.3         3.3          6           2.5 virginica
 2          5.8         2.7          5.1         1.9 virginica
 3          7.1         3            5.9         2.1 virginica
 4          6.3         2.9          5.6         1.8 virginica
 5          6.5         3            5.8         2.2 virginica
 6          7.6         3            6.6         2.1 virginica
 7          4.9         2.5          4.5         1.7 virginica
 8          7.3         2.9          6.3         1.8 virginica
 9          6.7         2.5          5.8         1.8 virginica
10          7.2         3.6          6.1         2.5 virginica
# … with 40 more rows

output of group_split needs to be saved as separate dataframe

group_split will not return you name of the year as list name. Use base::split instead.

y <- split(year_x, year_x$Year)
for(i in seq_along(y)) {                            
  write.csv2(y[[i]], paste0("D:/newfolder/", names(y)[i], ".csv"),row.names = FALSE)
}

You could also do this with purrr::imap

purrr::imap(y, ~write.csv2(.x, paste0("D:/newfolder/", .y, ".csv"),row.names = FALSE))

Using group_split, add a single value to each item in a list for looping and accumulating over

Perhaps, we can create a named list by looping over the list

library(purrr)
out <- map(mylist, ~ list(data = ., min_price = min(.$price)))

-checking

> out[[1]]$data
# A tibble: 163 x 10
   carat cut   color clarity depth table price     x     y     z
   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.75 Fair  D     SI2      64.6    57  2848  5.74  5.72  3.7 
 2  0.71 Fair  D     VS2      56.9    65  2858  5.89  5.84  3.34
 3  0.9  Fair  D     SI2      66.9    57  2885  6.02  5.9   3.99
 4  1    Fair  D     SI2      69.3    58  2974  5.96  5.87  4.1 
 5  1.01 Fair  D     SI2      64.6    56  3003  6.31  6.24  4.05
 6  0.73 Fair  D     VS1      66      54  3047  5.56  5.66  3.7 
 7  0.71 Fair  D     VS2      64.7    58  3077  5.61  5.58  3.62
 8  0.91 Fair  D     SI2      62.5    66  3079  6.08  6.01  3.78
 9  0.9  Fair  D     SI2      65.9    59  3205  6     5.95  3.94
10  0.9  Fair  D     SI2      66      58  3205  6     5.97  3.95
# … with 153 more rows
> out[[1]]$min_price
[1] 536

How to split R dataframe into list based on character variable

You can use is.na() on the second column to detect where the new blocks starts. Then cumsum(is.na()) will give you an id for each block. Group on this, not the contest name.

e_pivot <- function(x) {
    x %>% 
        select(-block_id) %>%
        slice(-1) %>% 
        row_to_names(row_number = 1) %>% 
        pivot_longer(cols = 3:last_col(), names_to = "candidate", values_to = "votes") %>% 
        rename(contest = 1, precinct = Precinct) %>%
        mutate(votes = as.numeric(votes))
}

t %>% 
    mutate(contest = if_else(str_detect(V1, " "), V1, NA_character_),,
           block_id = cumsum(is.na(V8)),
           .before = V1) %>% 
    fill(contest, .direction = "down") %>% 
    group_by(block_id) %>% 
    group_split() %>% 
    map_dfr(e_pivot)

How to Name the List of the Group_Split Output in Dplyr