Apply tidyr::separate over multiple columns

You could feed a customized separate_() call into Reduce().

sep <- function(...) {
dots <- list(...)
n <- stringr::str_count(dots[[1]][[dots[[2]]]], "\\d+")
separate_(..., into = sprintf("%s_col%d", dots[[2]], 1:n))

df %>% Reduce(f = sep, x = c("a", "b"))
# a_col_1 a_col_2 a_col_3 b_col_1 b_col_2 b_col_3
# 1 5312 2020 1212 345 982 284

Otherwise, cSplit will do it too.

splitstackshape::cSplit(df, names(df))
# a_1 a_2 a_3 b_1 b_2 b_3
# 1: 5312 2020 1212 345 982 284

Tidy method to split multiple columns using tidyr::separate

Could try:


names(df) %>%
df %>%
select(x) %>%
into = paste0(x, c("_attempted", "_landed")),
sep = " of ")
) %>%


# A tibble: 6 x 10
A_attempted A_landed B_attempted B_landed C_attempted C_landed D_attempted D_landed E_attempted E_landed
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 3 5 2 2 10 21 0 0 8 16
2 1 2 2 4 3 14 0 0 3 15
3 1 3 0 1 11 34 0 0 10 32
4 1 3 0 0 10 35 0 0 6 28
5 3 4 0 0 16 53 0 0 13 49
6 2 7 0 0 17 62 0 0 9 48

As OP suggests we can indeed avoid the last step with map_dfc:

names(df) %>% 
map_dfc(~ df %>%
select(.x) %>%
into = paste0(.x, c("_attempted", "_landed")),
sep = " of ")

tidyr: separate column while retaining delimiter in the first column

You can use tidyr::extract with capture groups.

tidyr::extract(duplicates, sample, c("strain", "sample"), '(.*_)(\\w+)')

# strain sample
#1 a_1_ b1
#2 a1_2_ b1
#3 a1_c_1_ b2

The same regex can also be used with strcapture in base R -

strcapture('(.*_)(\\w+)', duplicates$sample, 
proto = list(strain = character(), sample = character()))

How to use separate in tidyverse to split a column?

We can use extra argument. Also, by default, the sep is in regex mode - according to ?separate documentation

sep - If character, sep is interpreted as a regular expression. The default value is a regular expression that matches any sequence of non-alphanumeric values.

and . is a metacharacter which can match any character. Therefore, we may need to either escape (\\.) or place it in square brackets ([.]). Also, based on the dput, the column is a list, which should be unnested first before doing the separate

jimma3 %>%
select(Enterdateofexam2, Enterdayofexam, UniqueKey,MEDICALRECORD)%>%
unnest(Enterdateofexam2) %>%
separate(Enterdateofexam2,into=c("day", "month"),
sep="\\.", convert = TRUE, extra = "merge") %>%


# A tibble: 6 x 5
day month Enterdayofexam UniqueKey MEDICALRECORD
<int> <int> <chr> <chr> <chr>
1 7 6 1 530 577207
2 8 6 2 530 577207
3 9 6 3 530 577207
4 2 12 1 531 575333
5 3 12 2 531 575333
6 4 12 3 531 575333

Basically, with sep = ".", it is splitting at every character element and thus the warning popped up


jimma3 <- structure(list(Enterdateofexam2 = list(c("", "7.06"), c("", "8.06"
), c("", "9.06"), c("", "2.12"), c("", "3.12"), c("", "4.12")),
Enterdayofexam = c("1", "2", "3", "1", "2", "3"), UniqueKey = c("530",
"530", "530", "531", "531", "531"), MEDICALRECORD = c("577207",
"577207", "577207", "575333", "575333", "575333")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))

Split multiple columns into multiple columns using r

I suggest a reshape2 solution taking care of not knowing the number of parts:

> dput(pz1)
structure(list(id = c("HG00096", "HG00097", "HG00098", "HG00099"
), sub = c("GBR", "GBR", "GBR", "GBR"), HLA_A1 = c("01:01:01:01/01:01:01:02N",
"03:01:01:01/03:01:01:02N", "01:01:01:01/01:01:01:02N/01:22N",
"03:01:01:01"), HLA_A2 = c("29:02:01", "30:08:01", "29:02:01",
"30:08:01"), HLA_B1 = c("08:01:01/08:19N", "09:02:01/08:19N",
"08:01:01/08:19N", "09:02:01/08:19N"), HLA_B2 = c("44:03:01/44:03:03/44:03:04",
"44:03:01/44:03:03/44:03:04", "44:03:01/44:03:03/44:03:04", "44:03:01/44:03:03/44:03:04"
), HLA_C1 = c("07:01:01/07:01:02", "07:01:01/07:01:02", "07:09:01/07:01:02",
"07:08:01/07:01:02")), .Names = c("id", "sub", "HLA_A1", "HLA_A2",
"HLA_B1", "HLA_B2", "HLA_C1"), row.names = c(NA, -4L), class = "data.frame")

add this function:

library("reshape2", lib.loc="~/R/win-library/3.3")

getIt <- function(df,col) {
x <- max(sapply(strsplit(df[,col],split="/"),length)) ### get the max parts for column
q <- colsplit(string = df[,col],pattern="/",names = paste0(names(df)[col],"_",LETTERS[1:x]))
return(q) }

after you have this function you can easily do:

> getIt(pz1,3)
1 01:01:01:01 01:01:01:02N
2 03:01:01:01 03:01:01:02N
3 01:01:01:01 01:01:01:02N 01:22N
4 03:01:01:01

and a simple cbind with the original dataframe (with or without the original columns) :

> cbind(pz1[,1:2],getIt(pz1,3),getIt(pz1,4),getIt(pz1,5),getIt(pz1,6))
1 HG00096 GBR 01:01:01:01 01:01:01:02N 29:02:01 08:01:01 08:19N 44:03:01 44:03:03 44:03:04
2 HG00097 GBR 03:01:01:01 03:01:01:02N 30:08:01 09:02:01 08:19N 44:03:01 44:03:03 44:03:04
3 HG00098 GBR 01:01:01:01 01:01:01:02N 01:22N 29:02:01 08:01:01 08:19N 44:03:01 44:03:03 44:03:04
4 HG00099 GBR 03:01:01:01 30:08:01 09:02:01 08:19N 44:03:01 44:03:03 44:03:04

How do I separate a string with different (& repeated) separators into multiple columns?

many good answers, one other variation below

#replace all punctuation with a space then seperate
df %>%
mutate(game=str_replace_all(game,"[:punct:]"," ")) %>%
separate(col = game,into = c("year", "day", "month", "monthday", "site", "team", "decision", "runs1", "runs2"))

Separate a String using Tidyr's separate into Multiple Columns and then Create a New Column with Counts

We can try with str_count

df %>%
separate(Goal,paste0("Goal", 1:4), sep=",", remove=FALSE) %>%
mutate(Count = str_count(Goal, ",")+1) %>%
# Name Goal1 Goal2 Goal3 Goal4 Count
# <chr> <chr> <chr> <chr> <chr> <dbl>
#1 John Go back to school Learn to drive Learn to cook <NA> 3
#2 Chris Go back to school Get a job Learn a new Skill Learn to cook 4
#3 Andy Learn to drive Learn to Cook <NA> <NA> 2

using separate_rows in tidyr over many columns when some columns do not have delimiters

Ok, the easiest solution might be installing the development version of tidyr ( since it seems to be fixed there. Use devtools::install_github("tidyverse/tidyr") to achieve that.

However, for a workaround for those who can't update or don't want to use a prerelease version of the package, we can count the required number of separators in each row and fill the missing values in the columns with separators. That lets separate_rows work and creates empty strings, which we then replace back with NA.

A <- c("Acura", "BMW", "Toyota", NA)
B <- c("1993;2004;2010", "2013", "2003;2011", NA)
C <- c("Blue;Black;Gold", "Silver", NA, NA)
df <- data.frame(A = A, B = B, C = C, stringsAsFactors = FALSE)

df %>%
mutate(seps = str_pad("", width = str_count(B, ";"), pad = ";")) %>%
mutate_at(vars(B, C), ~ coalesce(., seps)) %>%
separate_rows(B, C, sep = ";") %>%
mutate_at(vars(B, C), ~ str_replace(., "^$", NA_character_))
#> A B C seps
#> 1 Acura 1993 Blue ;;
#> 2 Acura 2004 Black ;;
#> 3 Acura 2010 Gold ;;
#> 4 BMW 2013 Silver
#> 5 Toyota 2003 <NA> ;
#> 6 Toyota 2011 <NA> ;
#> 7 <NA> <NA> <NA> <NA>

Created on 2019-07-01 by the reprex package (v0.3.0)

tidyr use separate_rows over multiple columns

You can use a pipe. Note that sep = ", " is automatically detected.

d %>% separate_rows(b) %>% separate_rows(c)
# a b c
# 1 1 name1 name7
# 2 1 name2 name7
# 3 1 name3 name7
# 4 2 name4 name8
# 5 2 name4 name9
# 6 3 name5 name10
# 7 3 name6 name10

Note: Using tidyr version 0.6.0, where the %>% operator is included in the package.

Update: Using @doscendodiscimus comment, we could use a for() loop and reassign d in each iteration. This way we can have as many columns as we like. We will use a character vector of column names, so we'll need to switch to the standard evaluation version, separate_rows_.

cols <- c("b", "c")
for(col in cols) {
d <- separate_rows_(d, col)

which gives the updated d

  a     b      c
1 1 name1 name7
2 1 name2 name7
3 1 name3 name7
4 2 name4 name8
5 2 name4 name9
6 3 name5 name10
7 3 name6 name10

