R - Pivoting Duplicate Rows into Multiple Column with Unknown Number of Columns

R - pivoting duplicate rows into multiple column with unknown number of columns

With the packages dplyr and tidyr you can use

library(dplyr)
library(tidyr)

df %>%
group_by(companies) %>%
mutate(row_n = row_number()) %>%
pivot_wider(companies, names_from = row_n, values_from = sic, names_glue = "sic.{row_n}")

Output

# A tibble: 2 x 4
# Groups: companies [2]
# companies sic.1 sic.2 sic.3
# <chr> <chr> <chr> <chr>
# 1 ABC Ltd 12345 24155 31231
# 2 Derwent plc 55346 34234 NA

Show duplicate value on a separate row in pivot wider

You can use -

library(dplyr)
library(tidyr)

df %>%
pivot_wider(names_from=id,values_from=value, values_fn = list) %>%
unnest(cols = c(CP1, CP2))

# identifier label CP1 CP2
# <chr> <chr> <int> <int>
#1 e1 Monaco 0 1
#2 e1 became 0 0
#3 e2 the 1 1
#4 e2 first 0 0
#5 e1 the 10 NA
#6 e1 the 1 NA

You were close with your attempt as well, you had to include id in group_by -

df %>%
group_by(identifier,label, id) %>%
mutate(rn=row_number()) %>%
pivot_wider(names_from=id,values_from=value)

Pivoting data.frame with multiple columns per column

The data is all mixed up. If we have more such values we need to specify a way in which we can identify each group.

Here I have considered values with only characters in them as name, values with only numbers in them as year, you can add more such conditions if needed.

library(dplyr)
library(tidyr)

df %>%
pivot_longer(cols = everything(), names_to = 'col') %>%
mutate(col_name = case_when(grepl('^[A-Za-z]+$', value) ~ 'name',
grepl('^[0-9]+$', value) ~'year')) %>%
pivot_wider(names_from = col_name, values_from = value) %>%
type.convert(as.is = TRUE)

# A tibble: 4 x 3
# col name year
# <int> <chr> <int>
#1 1 bob 2011
#2 2 sally 2012
#3 3 fred 2013
#4 4 jim 2014

Pivot data into two different columns simultaneously using pivot_longer() in R?

Edit

Turns out, you can do it in one pivot_longer:

df %>% 
pivot_longer(-id,
names_to = c("variable", ".value"),
names_pattern = "(.*)\\.(.*)")%>%
rename(activation = act, fixation = fix)

with the same result.


Don't know how to do it in one go, but you could use

library(tidyr)
library(dplyr)

df %>%
pivot_longer(-id,
names_to = c("variable", "class"),
names_pattern = "(.*)\\.(.*)") %>%
pivot_wider(names_from = "class") %>%
rename(activation = act, fixation = fix)

This returns

# A tibble: 4 x 4
id variable activation fixation
<dbl> <chr> <dbl> <dbl>
1 1 v1 0.4 1
2 1 v2 0.5 0
3 2 v1 0.8 0
4 2 v2 0.7 1

Reshaping data to wide format in R

Create a row number column for each id and reshape the data to wide format.

library(dplyr)
library(tidyr)

df %>%
group_by(id) %>%
mutate(col = row_number()) %>%
ungroup %>%
pivot_wider(names_from = col, values_from = x:stop)

# A tibble: 10 x 41
# id x_1 x_2 x_3 x_4 x_5 x_6 x_7 x_8 x_9 x_10
# <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 1 A B C D E F G H I J
# 2 2 A B C D E F G H I J
# 3 3 A B C D E F G H I J
# 4 4 A B C D E F G H I J
# 5 5 A B C D E F G H I J
# 6 6 A B C D E F G H I J
# 7 7 A B C D E F G H I J
# 8 8 A B C D E F G H I J
# 9 9 A B C D E F G H I J
#10 10 A B C D E F G H I J
# … with 30 more variables: y_1 <chr>, y_2 <chr>, y_3 <chr>,
# y_4 <chr>, y_5 <chr>, y_6 <chr>, y_7 <chr>, y_8 <chr>, y_9 <chr>,
# y_10 <chr>, start_1 <date>, start_2 <date>, start_3 <date>,
# start_4 <date>, start_5 <date>, start_6 <date>, start_7 <date>,
# start_8 <date>, start_9 <date>, start_10 <date>, stop_1 <date>,
# stop_2 <date>, stop_3 <date>, stop_4 <date>, stop_5 <date>,
# stop_6 <date>, stop_7 <date>, stop_8 <date>, stop_9 <date>,
# stop_10 <date>

Pivoting wide to long format and then nesting columns

A tidyverse approach to achieve your desired result may look like so:

library(tibble)

df_1 <-
tribble(~key, ~values.male, ~values.female, ~values.red, ~values.green, ~value,
"gender", 0.5, 0.5, NA, NA, NA,
"age", NA, NA, NA, NA, "50",
"color", NA, NA, TRUE, FALSE, NA,
"time_of_day", NA, NA, NA, NA, "noon")

library(tidyr)
library(dplyr)
library(purrr)

df_pivoted <- df_1 %>%
mutate(across(everything(), as.character)) %>%
pivot_longer(-key, names_to = "level", names_prefix = "^values\\.", values_drop_na = TRUE) %>%
group_by(key) %>%
nest() %>%
mutate(data = map(data, ~ if (all(.x$level == "value")) deframe(.x) else .x))
df_pivoted
#> # A tibble: 4 x 2
#> # Groups: key [4]
#> key data
#> <chr> <list>
#> 1 gender <tibble [2 × 2]>
#> 2 age <chr [1]>
#> 3 color <tibble [2 × 2]>
#> 4 time_of_day <chr [1]>

EDIT Following the clarification in your comments on the desired result we could simply get rid of the map statement as the end (which basically was meant for converting the tibbles for categories without levels to a vector) and add a mutate statement before nesting to replace the level with NA for categories without a level:

pivot_nest <- function(x) {
mutate(x, across(everything(), as.character)) %>%
pivot_longer(-key, names_to = "level", names_prefix = "^values\\.", values_drop_na = TRUE) %>%
group_by(key) %>%
mutate(level = ifelse(all(level == "value"), NA_character_, level)) %>%
nest()
}

df_pivoted <- df_1 %>%
pivot_nest()
df_pivoted
#> # A tibble: 4 x 2
#> # Groups: key [4]
#> key data
#> <chr> <list>
#> 1 gender <tibble [2 × 2]>
#> 2 age <tibble [1 × 2]>
#> 3 color <tibble [2 × 2]>
#> 4 time_of_day <tibble [1 × 2]>
df_pivoted$data
#> [[1]]
#> # A tibble: 2 x 2
#> level value
#> <chr> <chr>
#> 1 male 0.5
#> 2 male 0.5
#>
#> [[2]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> 50
#>
#> [[3]]
#> # A tibble: 2 x 2
#> level value
#> <chr> <chr>
#> 1 red TRUE
#> 2 red FALSE
#>
#> [[4]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> noon

df_2 <- tribble(~key, ~value, "age", "50", "income", "100000", "time_of_day", "noon")

df_pivoted2 <- df_2 %>%
pivot_nest()
df_pivoted2
#> # A tibble: 3 x 2
#> # Groups: key [3]
#> key data
#> <chr> <list>
#> 1 age <tibble [1 × 2]>
#> 2 income <tibble [1 × 2]>
#> 3 time_of_day <tibble [1 × 2]>
df_pivoted2$data
#> [[1]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> 50
#>
#> [[2]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> 100000
#>
#> [[3]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> noon

Why does pivot_wider either read single values as duplicates or create a wide-and-long tibble (without merging rows)?

We could create a sequence column with rowid

library(dplyr)
library(tidyr)
library(data.table)
df1 %>%
mutate(rn = rowid(Point, Species)) %>%
pivot_wider(names_from = Species, values_from = Number,
values_fill = list(Number = '0'))

If we want all the combinations, use complete

df1 %>% 
complete(Point, Layer, fill = list(Number = '0')) %>%
fill(Species) %>%
pivot_wider(names_from = Species, values_from = Number,
values_fill = list(Number = '0'))
# A tibble: 6 x 11
# Point Layer Lari_deci Quer_rope Pinu_sylv Betu_pend Sorb_aucu Acer_pseu Popu_trem Fagu_sylv Pice_abie
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 P03 C 21 17 5 1 0 0 0 0 0
#2 P03 U 0 0 0 0 3 1 0 0 0
#3 P06 C 3 28 28 0 0 0 6 0 0
#4 P06 U 0 0 0 0 0 0 0 0 0
#5 P07 C 0 3 20 1 0 0 0 110 5
#6 P07 U 0 0 0 0 0 0 0 0 0

How do I pivot_wider() so that duplicates are maintained in their own columns?

Create a sequence column and it should work

library(dplyr)
library(tidyr)
library(data.table)
library(stringr)
tibble(long_box, surprise) %>%
mutate(nm1= str_c('suprise_', rowid(long_box))) %>%
pivot_wider(names_from = nm1, values_from = surprise)

-output

# A tibble: 3 x 4
long_box suprise_1 suprise_2 suprise_3
<chr> <chr> <chr> <chr>
1 A apple orange orange
2 B apple banana insects
3 C apple insects <NA>

Pivot_wider function (tidyr r package) from multiple variables

We can create a sequence column

library(dplyr)
library(tidyr)
library(data.table)
df %>%
mutate(ID = NULL, rn = rowid(Country, Season_ID)) %>%
pivot_wider(names_from = Gender,
values_from = Region_UN,
id_cols = c(rn, Country, Season_ID))


Related Topics



Leave a reply



Submit