How to Split Data Frame by Column Names in R

split data frame with recurring column names

One way would be to find the name of the column in the dataframe and split based on that. We can use group_split to split data into dataframes, we use map_at to remove the 1st row from each list since it is the column name and use type.convert to convert columns into it's respective classes.

library(dplyr)
library(purrr)

temp <- df %>%
          group_split(id = cumsum(A == names(.)[1]) + 1) %>%
          map_at(-1, tail, -1) %>%
          map(type.convert)
temp

#[[1]]
# A tibble: 3 x 5
#     A     B     C     D     id
#   <dbl> <dbl> <dbl> <dbl> <int>
#1 0.668 0.411 0.553 0.477     1
#2 0.794 0.821 0.530 0.732     1
#3 0.108 0.647 0.789 0.693     1

#[[2]]
# A tibble: 6 x 5
#    A     B     C     D    id
#  <dbl> <dbl> <dbl> <dbl> <int>
#1 0.724 0.783 0.023 0.478     2
#2 0.861 0.099 0.407 0.332     2
#3 0.438 0.316 0.913 0.651     2
#4 0.245 0.519 0.294 0.258     2
#5 0.07  0.662 0.459 0.479     2
#6 0.766 0.839 0.892 0.961     2

#[[3]]
# A tibble: 4 x 5
#      A     B     C     D    id
#  <dbl> <dbl> <dbl> <dbl> <int>
#1 0.084 0.347 0.864 0.435     3
#2 0.875 0.334 0.39  0.713     3
#3 0.339 0.476 0.777 0.4       3
#4 0.084 0.347 0.864 0.435     3

Using the same logic in base R, we can do

df$id <- cumsum(df$A == names(df)[1]) + 1
temp <- split(df, df$id)
temp[-1] <- lapply(temp[-1], tail, -1)
temp <- lapply(temp, type.convert)

If you need them as separate dataframes do,

names(temp) <- paste0("df", seq_along(temp))
list2env(temp, .GlobalEnv)

data

df <- structure(list(A = structure(c(7L, 10L, 3L, 13L, 8L, 11L, 6L, 
4L, 1L, 9L, 13L, 2L, 12L, 5L, 2L), .Label = c("0.070", "0.084", 
"0.108", "0.245", "0.339", "0.438", "0.668", "0.724", "0.766", 
"0.794", "0.861", "0.875", "A"), class = "factor"), B = structure(c(5L, 
11L, 8L, 13L, 10L, 1L, 2L, 7L, 9L, 12L, 13L, 4L, 3L, 6L, 4L), .Label = c("0.099", 
"0.316", "0.334", "0.347", "0.411", "0.476", "0.519", "0.647", 
"0.662", "0.783", "0.821", "0.839", "B"), class = "factor"), 
 C = structure(c(7L, 6L, 9L, 13L, 1L, 4L, 12L, 2L, 5L, 11L, 
13L, 10L, 3L, 8L, 10L), .Label = c("0.023", "0.294", "0.390", 
"0.407", "0.459", "0.530", "0.553", "0.777", "0.789", "0.864", 
"0.892", "0.913", "C"), class = "factor"), D = structure(c(5L, 
11L, 9L, 13L, 6L, 2L, 8L, 1L, 7L, 12L, 13L, 4L, 10L, 3L, 
4L), .Label = c("0.258", "0.332", "0.400", "0.435", "0.477", 
"0.478", "0.479", "0.651", "0.693", "0.713", "0.732", "0.961", 
"D"), class = "factor")), class = "data.frame", row.names = c(NA, -15L))

Split Data Frame Into N Data Frames Based On Column Names

We can use split.default on the substring of names of the dataset

split.default(df1, sub("-.*", "", names(df1)))

Or if we know there would be only one character before -

split.default(df1, substr(names(df1), 1, 1))
#$A
#  A-DIODE A-DIODE.1
#1     1.2       1.3
#2     0.4       0.6

#$B
#  B-DIODE B-ACC1 B-ACC2 B-ANA0 B-ANA1 B-BRICKID B-CC0 B-CC1 B-DIGDN B-DIGDP B-DN1 B-DN2 B-DP1 B-DP2 B-SCL B-SDA B-USB0DN B-USB0DP
#1     1.4    1.5    1.6    1.7    1.8       1.9     2   2.1     2.2     2.3   2.4   2.5   2.6   2.7   2.8   2.9        3      3.1
#2     0.8    1.0    1.2    1.4    1.6       1.8     2   2.2     2.4     2.6   2.8   3.0   3.2   3.4   3.6   3.8        4      4.2
#  B-USB1DN B-USB1DP B-ACC1.1 B-ACC2.1 B-ANA0.1 B-ANA1.1 B-BRICKID.1 B-CC0.1 B-CC1.1 B-DIGDN.1 B-DIGDP.1 B-DN1.1 B-DN2.1 B-DP1.1 B-DP2.1
#1      3.2      3.3      3.4      3.5      3.6      3.7         3.8     3.9       4       4.1       4.2     4.3     4.4     4.5     4.6
#2      4.4      4.6      4.8      5.0      5.2      5.4         5.6     5.8       6       6.2       6.4     6.6     6.8     7.0     7.2
#  B-SCL.1 B-SDA.1 B-USB0DN.1 B-USB0DP.1 B-USB1DN.1 B-USB1DP.1 B-NA B-ACC2PWRLKG_0v4 B-ACC2PWRLKG_0v4.1 B-P_IN_Leak
#1     4.7     4.8        4.9          5        5.1        5.2  5.3              5.4                5.5         5.6
#2     7.4     7.6        7.8          8        8.2        8.4  8.6              8.8                9.0         9.2

Splitting a dataframe into multiple dataframes based on the column name in R

You could use split.default

split.default(df, c(0, cumsum(grepl("^REG", names(df)[-ncol(df)]))))

$`0`
        a       b      c   REG01
x  2844.8  1430.9  906.2  1871.0
y 10232.5 29263.6 6019.1 69618.7
z 20150.6 26334.5 6848.6 45032.2

$`1`
        d        e   REG02
x  2106.0   1818.8  1364.5
y 29929.6 232371.1 57561.7
z 58626.1  42713.6 20656.4

$`2`
        f   REG03
x   520.4  1821.4
y 46754.9 43862.3
z  9036.9 51876.1

R - split dataframe into list by column while retaining a second column, then rename list elements by col name

Get the dataframe in long format and use split

library(tidyverse)

df %>%
  pivot_longer(cols = starts_with('score')) %>%
  split(.$name) %>%
  map(~.x %>% select(-name))

#$score1
# A tibble: 6 x 2
#  state  value
#  <chr>  <dbl>
#1 a      1.58 
#2 a      0.567
#3 b     -0.313
#4 b      0.756
#5 c      0.236
#6 c      1.05 

#$score2
# A tibble: 6 x 2
#  state value
#  <chr> <dbl>
#1 a      9.93
#2 a      9.96
#3 b     12.2 
#4 b      9.41
#5 c      9.40
#6 c      9.97

You can also use group_split and avoid the map step but it doesn't give the list names (score1, score2) in the output.

df %>%
  pivot_longer(cols = starts_with('score')) %>%
  group_split(name, .keep  = FALSE)

Match column names with another dataframe and split into separate dataframes

Try next code:

library(dplyr)
library(tidyr)
#Code
data <- df %>% pivot_longer(everything()) %>%
  left_join(keys,by = c('name'='sample_name'))
#Split
List <- split(data,data$site_name)
List <- lapply(List,function(x) {x$site_name<-NULL;x})
list2env(List,envir = .GlobalEnv)

Outputs:

List
$chic_1
# A tibble: 2 x 2
  name       value
  <chr>      <dbl>
1 chic56.345   0.6
2 chic56.345   1.2

$tex_1
# A tibble: 2 x 2
  name      value
  <chr>     <dbl>
1 tex21.222   0.5
2 tex21.222   0.8

$wa_1
# A tibble: 2 x 2
  name     value
  <chr>    <dbl>
1 wa34.907  1.12
2 wa34.907  0.9

How to split a list of data frames based on its column names?

Create groups that you get by matching the names with match(), then use split().

f <- sapply(mylist, function(x) length(na.omit(match(names(x), names))))
listNew <- setNames(split(mylist, f), c("listB", "listA"))

Yielding

> str(listNew)
List of 2
 $ listB:List of 1
  ..$ :'data.frame':    15 obs. of  3 variables:
  .. ..$ v1 : int [1:15] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..$ v2 : int [1:15] 20 21 22 23 24 25 26 27 28 29 ...
  .. ..$ v3b: Factor w/ 15 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ listA:List of 2
  ..$ :'data.frame':    15 obs. of  3 variables:
  .. ..$ v1: int [1:15] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..$ v2: int [1:15] 20 21 22 23 24 25 26 27 28 29 ...
  .. ..$ v3: Factor w/ 15 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10 ...
  ..$ :'data.frame':    15 obs. of  3 variables:
  .. ..$ v1: int [1:15] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..$ v2: int [1:15] 20 21 22 23 24 25 26 27 28 29 ...
  .. ..$ v3: Factor w/ 15 levels "a","b","c","d",..: 1 2 3 4 5 6 7 8 9 10 ...

R split column names with different occurrences of delimiter into strings and assign unique strings/string counts to a new dataframe

I think if you split at the "underscore, digit, underscore" it provides a solution to your statement above. This does eliminate the digit and the associated information. Does this matter?

names <- c("strainA_1_batch1", "strainA_2_batch2", "strainB_1_batch1", "strainC_1_batch2", "strainC_2_batch2", 
           "strainD_a_1_batch1", "strainD_b_1_batch1")

#split at the underscore, digit and underscore 
splitList <- strsplit(names, "_\\d_")

#convert to dataframe
df <-data.frame(t(as.data.frame.list(splitList)))

#clean up data.frame
rownames(df)<-NULL
names(df)<-c("Strain", "Batch")
df

#report
table(df$Strain)
table(df$Batch)

Another option is to replace the underscore on either side of the digit with a " " (or other character) and then split on the space.

names<-gsub("_(\\d)_", " \\1 ", names)