Split Dataframe Using Two Columns of Data and Apply Common Transformation on List of Resulting Dataframes

Split a dataframe into list of dataframes after each event

Suppose your data is in a data.frame called d:

# find rows where the _previous_ row contained "event"
d$check_for_event = dplyr::lag(d$type, default = "") == "event"
# take cumsum to create increment column
d$cumu_check_for_event <- cumsum(d$check_for_event)
# use split to create a list of data.frames based on increment column
split(d, d$cumu_check_for_event)

$`0`
# A tibble: 4 x 4
  from     type  check_for_event cumu_check_for_event
  <chr>    <chr> <lgl>                          <int>
1 user     text  FALSE                              0
2 operator text  FALSE                              0
3 operator text  FALSE                              0
4 operator event FALSE                              0

$`1`
# A tibble: 3 x 4
  from     type  check_for_event cumu_check_for_event
  <chr>    <chr> <lgl>                          <int>
1 user     text  TRUE                               1
2 operator text  FALSE                              1
3 operator event FALSE                              1

$`2`
# A tibble: 3 x 4
  from     type  check_for_event cumu_check_for_event
  <chr>    <chr> <lgl>                          <int>
1 user     text  TRUE                               2
2 operator text  FALSE                              2
3 operator text  FALSE                              2

Match column names with another dataframe and split into separate dataframes

Try next code:

library(dplyr)
library(tidyr)
#Code
data <- df %>% pivot_longer(everything()) %>%
  left_join(keys,by = c('name'='sample_name'))
#Split
List <- split(data,data$site_name)
List <- lapply(List,function(x) {x$site_name<-NULL;x})
list2env(List,envir = .GlobalEnv)

Outputs:

List
$chic_1
# A tibble: 2 x 2
  name       value
  <chr>      <dbl>
1 chic56.345   0.6
2 chic56.345   1.2

$tex_1
# A tibble: 2 x 2
  name      value
  <chr>     <dbl>
1 tex21.222   0.5
2 tex21.222   0.8

$wa_1
# A tibble: 2 x 2
  name     value
  <chr>    <dbl>
1 wa34.907  1.12
2 wa34.907  0.9

Split Dataframe into list of one-row Dataframes

A simple way would be to use the split() command built into R

split( df, 1:length( df$a ) )

It should be robust enough to handle duplicates in df$a.

Uniquely split using two columns (character variables)

You can combine the two columns into one and then use split on that column.

df$groups <- paste(df$P, df$S, sep="_")
split_data <- split(df$C, df$groups)

Merge two dataframes and keep both columns in R

We can transform to create a new column before the merge and change the by.y with the new column

merge(x = a, y = transform(b, sd.3t = sd.3), by.x = "sd.3.IID",
      by.y = "sd.3t", all = TRUE)

-output

   sd.3.IID       sd.3       sd.4       sd.5
1   0_62000_1  0_62000_1  0_62000_1  0_62000_1
2   0_62004_5  0_62004_5  0_62004_5  0_62004_5
3  0_62070_19 0_62070_19 0_62070_19 0_62070_19
4  0_62070_23 0_62070_23 0_62070_23 0_62070_23
5  0_62070_27 0_62070_27 0_62070_27 0_62070_27
6  0_62070_33 0_62070_33 0_62070_33 0_62070_33
7   ADNI_1092       <NA>       <NA>       <NA>
8   ADNI_1263       <NA>       <NA>       <NA>
9   ADNI_1283       <NA>       <NA>       <NA>
10  ADNI_1334       <NA>       <NA>       <NA>
11  0_62071_1  0_62071_1  0_62071_1  0_62071_1
12 0_62071_15 0_62071_15 0_62071_15 0_62071_15

Using map on specific column in list?

df %>% 
  group_split(Species) %>%
  map(~arrange(.data = .x, desc(Sepal.Length)))

df %>% 
  group_split(Species) %>%
  map(~.x %>% arrange(desc(Sepal.Length)))