R - pivoting duplicate rows into multiple column with unknown number of columns
With the packages dplyr
and tidyr
you can use
library(dplyr)
library(tidyr)
df %>%
group_by(companies) %>%
mutate(row_n = row_number()) %>%
pivot_wider(companies, names_from = row_n, values_from = sic, names_glue = "sic.{row_n}")
Output
# A tibble: 2 x 4
# Groups: companies [2]
# companies sic.1 sic.2 sic.3
# <chr> <chr> <chr> <chr>
# 1 ABC Ltd 12345 24155 31231
# 2 Derwent plc 55346 34234 NA
Show duplicate value on a separate row in pivot wider
You can use -
library(dplyr)
library(tidyr)
df %>%
pivot_wider(names_from=id,values_from=value, values_fn = list) %>%
unnest(cols = c(CP1, CP2))
# identifier label CP1 CP2
# <chr> <chr> <int> <int>
#1 e1 Monaco 0 1
#2 e1 became 0 0
#3 e2 the 1 1
#4 e2 first 0 0
#5 e1 the 10 NA
#6 e1 the 1 NA
You were close with your attempt as well, you had to include id
in group_by
-
df %>%
group_by(identifier,label, id) %>%
mutate(rn=row_number()) %>%
pivot_wider(names_from=id,values_from=value)
Pivoting data.frame with multiple columns per column
The data is all mixed up. If we have more such values we need to specify a way in which we can identify each group.
Here I have considered values with only characters in them as name
, values with only numbers in them as year
, you can add more such conditions if needed.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = everything(), names_to = 'col') %>%
mutate(col_name = case_when(grepl('^[A-Za-z]+$', value) ~ 'name',
grepl('^[0-9]+$', value) ~'year')) %>%
pivot_wider(names_from = col_name, values_from = value) %>%
type.convert(as.is = TRUE)
# A tibble: 4 x 3
# col name year
# <int> <chr> <int>
#1 1 bob 2011
#2 2 sally 2012
#3 3 fred 2013
#4 4 jim 2014
Pivot data into two different columns simultaneously using pivot_longer() in R?
Edit
Turns out, you can do it in one pivot_longer
:
df %>%
pivot_longer(-id,
names_to = c("variable", ".value"),
names_pattern = "(.*)\\.(.*)")%>%
rename(activation = act, fixation = fix)
with the same result.
Don't know how to do it in one go, but you could use
library(tidyr)
library(dplyr)
df %>%
pivot_longer(-id,
names_to = c("variable", "class"),
names_pattern = "(.*)\\.(.*)") %>%
pivot_wider(names_from = "class") %>%
rename(activation = act, fixation = fix)
This returns
# A tibble: 4 x 4
id variable activation fixation
<dbl> <chr> <dbl> <dbl>
1 1 v1 0.4 1
2 1 v2 0.5 0
3 2 v1 0.8 0
4 2 v2 0.7 1
Reshaping data to wide format in R
Create a row number column for each id
and reshape the data to wide format.
library(dplyr)
library(tidyr)
df %>%
group_by(id) %>%
mutate(col = row_number()) %>%
ungroup %>%
pivot_wider(names_from = col, values_from = x:stop)
# A tibble: 10 x 41
# id x_1 x_2 x_3 x_4 x_5 x_6 x_7 x_8 x_9 x_10
# <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 1 A B C D E F G H I J
# 2 2 A B C D E F G H I J
# 3 3 A B C D E F G H I J
# 4 4 A B C D E F G H I J
# 5 5 A B C D E F G H I J
# 6 6 A B C D E F G H I J
# 7 7 A B C D E F G H I J
# 8 8 A B C D E F G H I J
# 9 9 A B C D E F G H I J
#10 10 A B C D E F G H I J
# … with 30 more variables: y_1 <chr>, y_2 <chr>, y_3 <chr>,
# y_4 <chr>, y_5 <chr>, y_6 <chr>, y_7 <chr>, y_8 <chr>, y_9 <chr>,
# y_10 <chr>, start_1 <date>, start_2 <date>, start_3 <date>,
# start_4 <date>, start_5 <date>, start_6 <date>, start_7 <date>,
# start_8 <date>, start_9 <date>, start_10 <date>, stop_1 <date>,
# stop_2 <date>, stop_3 <date>, stop_4 <date>, stop_5 <date>,
# stop_6 <date>, stop_7 <date>, stop_8 <date>, stop_9 <date>,
# stop_10 <date>
Pivoting wide to long format and then nesting columns
A tidyverse approach to achieve your desired result may look like so:
library(tibble)
df_1 <-
tribble(~key, ~values.male, ~values.female, ~values.red, ~values.green, ~value,
"gender", 0.5, 0.5, NA, NA, NA,
"age", NA, NA, NA, NA, "50",
"color", NA, NA, TRUE, FALSE, NA,
"time_of_day", NA, NA, NA, NA, "noon")
library(tidyr)
library(dplyr)
library(purrr)
df_pivoted <- df_1 %>%
mutate(across(everything(), as.character)) %>%
pivot_longer(-key, names_to = "level", names_prefix = "^values\\.", values_drop_na = TRUE) %>%
group_by(key) %>%
nest() %>%
mutate(data = map(data, ~ if (all(.x$level == "value")) deframe(.x) else .x))
df_pivoted
#> # A tibble: 4 x 2
#> # Groups: key [4]
#> key data
#> <chr> <list>
#> 1 gender <tibble [2 × 2]>
#> 2 age <chr [1]>
#> 3 color <tibble [2 × 2]>
#> 4 time_of_day <chr [1]>
EDIT Following the clarification in your comments on the desired result we could simply get rid of the map statement as the end (which basically was meant for converting the tibbles for categories without levels to a vector) and add a mutate statement before nesting to replace the level with NA for categories without a level
:
pivot_nest <- function(x) {
mutate(x, across(everything(), as.character)) %>%
pivot_longer(-key, names_to = "level", names_prefix = "^values\\.", values_drop_na = TRUE) %>%
group_by(key) %>%
mutate(level = ifelse(all(level == "value"), NA_character_, level)) %>%
nest()
}
df_pivoted <- df_1 %>%
pivot_nest()
df_pivoted
#> # A tibble: 4 x 2
#> # Groups: key [4]
#> key data
#> <chr> <list>
#> 1 gender <tibble [2 × 2]>
#> 2 age <tibble [1 × 2]>
#> 3 color <tibble [2 × 2]>
#> 4 time_of_day <tibble [1 × 2]>
df_pivoted$data
#> [[1]]
#> # A tibble: 2 x 2
#> level value
#> <chr> <chr>
#> 1 male 0.5
#> 2 male 0.5
#>
#> [[2]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> 50
#>
#> [[3]]
#> # A tibble: 2 x 2
#> level value
#> <chr> <chr>
#> 1 red TRUE
#> 2 red FALSE
#>
#> [[4]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> noon
df_2 <- tribble(~key, ~value, "age", "50", "income", "100000", "time_of_day", "noon")
df_pivoted2 <- df_2 %>%
pivot_nest()
df_pivoted2
#> # A tibble: 3 x 2
#> # Groups: key [3]
#> key data
#> <chr> <list>
#> 1 age <tibble [1 × 2]>
#> 2 income <tibble [1 × 2]>
#> 3 time_of_day <tibble [1 × 2]>
df_pivoted2$data
#> [[1]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> 50
#>
#> [[2]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> 100000
#>
#> [[3]]
#> # A tibble: 1 x 2
#> level value
#> <chr> <chr>
#> 1 <NA> noon
Why does pivot_wider either read single values as duplicates or create a wide-and-long tibble (without merging rows)?
We could create a sequence column with rowid
library(dplyr)
library(tidyr)
library(data.table)
df1 %>%
mutate(rn = rowid(Point, Species)) %>%
pivot_wider(names_from = Species, values_from = Number,
values_fill = list(Number = '0'))
If we want all the combinations, use complete
df1 %>%
complete(Point, Layer, fill = list(Number = '0')) %>%
fill(Species) %>%
pivot_wider(names_from = Species, values_from = Number,
values_fill = list(Number = '0'))
# A tibble: 6 x 11
# Point Layer Lari_deci Quer_rope Pinu_sylv Betu_pend Sorb_aucu Acer_pseu Popu_trem Fagu_sylv Pice_abie
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 P03 C 21 17 5 1 0 0 0 0 0
#2 P03 U 0 0 0 0 3 1 0 0 0
#3 P06 C 3 28 28 0 0 0 6 0 0
#4 P06 U 0 0 0 0 0 0 0 0 0
#5 P07 C 0 3 20 1 0 0 0 110 5
#6 P07 U 0 0 0 0 0 0 0 0 0
How do I pivot_wider() so that duplicates are maintained in their own columns?
Create a sequence column and it should work
library(dplyr)
library(tidyr)
library(data.table)
library(stringr)
tibble(long_box, surprise) %>%
mutate(nm1= str_c('suprise_', rowid(long_box))) %>%
pivot_wider(names_from = nm1, values_from = surprise)
-output
# A tibble: 3 x 4
long_box suprise_1 suprise_2 suprise_3
<chr> <chr> <chr> <chr>
1 A apple orange orange
2 B apple banana insects
3 C apple insects <NA>
Pivot_wider function (tidyr r package) from multiple variables
We can create a sequence column
library(dplyr)
library(tidyr)
library(data.table)
df %>%
mutate(ID = NULL, rn = rowid(Country, Season_ID)) %>%
pivot_wider(names_from = Gender,
values_from = Region_UN,
id_cols = c(rn, Country, Season_ID))
Related Topics
Usage of Dot/Period in R Functions
R: Need Finite 'Ylim' Values in Function
Reshape R Data with User Entries in Rows, Collapsing for Each User
How to Format the X-Axis of the Hard Coded Plotting Function of Spei Package in R
Reconstruct a Categorical Variable from Dummies in R
Tls V1.1/Tls V1.2 Support in Rcurl
Adding Grouped Mean Values to Column in Data Frame
Terms of a Sum in a R Expression
R Convert String Date (E.G. "October 1, 2014") to Date Format
Shiny Ui.R - Error in Tag("Div", List(...)) - Not Sure Where Error Is
Creating "Word" Cloud of Phrases, Not Individual Words in R
R Histogram from Frequency Table
Sum Columns Row-Wise with Similar Names
How to Draw Roc Curve Using Value of Confusion Matrix
Reshape Data for Values in One Column
R Shiny - Checkboxes and Action Button Combination Issue
Subsetting a Data Frame to the Rows Not Appearing in Another Data Frame