R - Pivoting Duplicate Rows into Multiple Column with Unknown Number of Columns

R - pivoting duplicate rows into multiple column with unknown number of columns

With the packages dplyr and tidyr you can use

library(dplyr)
library(tidyr)

df %>% 
  group_by(companies) %>% 
  mutate(row_n = row_number()) %>% 
  pivot_wider(companies, names_from = row_n, values_from = sic, names_glue = "sic.{row_n}")

Output

# A tibble: 2 x 4
# Groups:   companies [2]
#   companies   sic.1 sic.2 sic.3
#   <chr>       <chr> <chr> <chr>
# 1 ABC Ltd     12345 24155 31231
# 2 Derwent plc 55346 34234 NA

Show duplicate value on a separate row in pivot wider

You can use -

library(dplyr)
library(tidyr)

df %>%
  pivot_wider(names_from=id,values_from=value, values_fn = list) %>%
  unnest(cols = c(CP1, CP2))

#  identifier label    CP1   CP2
#  <chr>      <chr>  <int> <int>
#1 e1         Monaco     0     1
#2 e1         became     0     0
#3 e2         the        1     1
#4 e2         first      0     0
#5 e1         the       10    NA
#6 e1         the        1    NA

You were close with your attempt as well, you had to include id in group_by -

df %>%
  group_by(identifier,label, id) %>% 
  mutate(rn=row_number()) %>% 
  pivot_wider(names_from=id,values_from=value)

Pivoting data.frame with multiple columns per column

The data is all mixed up. If we have more such values we need to specify a way in which we can identify each group.

Here I have considered values with only characters in them as name, values with only numbers in them as year, you can add more such conditions if needed.

library(dplyr)
library(tidyr)

df %>%
  pivot_longer(cols = everything(), names_to = 'col') %>%
  mutate(col_name = case_when(grepl('^[A-Za-z]+$', value) ~ 'name', 
                              grepl('^[0-9]+$', value) ~'year')) %>%
  pivot_wider(names_from = col_name, values_from = value) %>%
  type.convert(as.is = TRUE)

# A tibble: 4 x 3
#    col name   year
#  <int> <chr> <int>
#1     1 bob    2011
#2     2 sally  2012
#3     3 fred   2013
#4     4 jim    2014

Pivot data into two different columns simultaneously using pivot_longer() in R?

Edit

Turns out, you can do it in one pivot_longer:

df %>% 
  pivot_longer(-id,
               names_to = c("variable", ".value"),
               names_pattern = "(.*)\\.(.*)")%>% 
  rename(activation = act, fixation = fix)

with the same result.

Don't know how to do it in one go, but you could use

library(tidyr)
library(dplyr)

df %>% 
  pivot_longer(-id,
               names_to = c("variable", "class"),
               names_pattern = "(.*)\\.(.*)") %>% 
  pivot_wider(names_from = "class") %>% 
  rename(activation = act, fixation = fix)

This returns

# A tibble: 4 x 4
     id variable activation fixation
  <dbl> <chr>         <dbl>    <dbl>
1     1 v1              0.4        1
2     1 v2              0.5        0
3     2 v1              0.8        0
4     2 v2              0.7        1

Reshaping data to wide format in R

Create a row number column for each id and reshape the data to wide format.

library(dplyr)
library(tidyr)

df %>%
  group_by(id) %>%
  mutate(col = row_number()) %>%
  ungroup %>%
  pivot_wider(names_from = col, values_from = x:stop)

# A tibble: 10 x 41
#      id x_1   x_2   x_3   x_4   x_5   x_6   x_7   x_8   x_9   x_10 
#   <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1     1 A     B     C     D     E     F     G     H     I     J    
# 2     2 A     B     C     D     E     F     G     H     I     J    
# 3     3 A     B     C     D     E     F     G     H     I     J    
# 4     4 A     B     C     D     E     F     G     H     I     J    
# 5     5 A     B     C     D     E     F     G     H     I     J    
# 6     6 A     B     C     D     E     F     G     H     I     J    
# 7     7 A     B     C     D     E     F     G     H     I     J    
# 8     8 A     B     C     D     E     F     G     H     I     J    
# 9     9 A     B     C     D     E     F     G     H     I     J    
#10    10 A     B     C     D     E     F     G     H     I     J    
# … with 30 more variables: y_1 <chr>, y_2 <chr>, y_3 <chr>,
#   y_4 <chr>, y_5 <chr>, y_6 <chr>, y_7 <chr>, y_8 <chr>, y_9 <chr>,
#   y_10 <chr>, start_1 <date>, start_2 <date>, start_3 <date>,
#   start_4 <date>, start_5 <date>, start_6 <date>, start_7 <date>,
#   start_8 <date>, start_9 <date>, start_10 <date>, stop_1 <date>,
#   stop_2 <date>, stop_3 <date>, stop_4 <date>, stop_5 <date>,
#   stop_6 <date>, stop_7 <date>, stop_8 <date>, stop_9 <date>,
#   stop_10 <date>

Pivoting wide to long format and then nesting columns

A tidyverse approach to achieve your desired result may look like so:

library(tibble)

df_1 <-
  tribble(~key, ~values.male, ~values.female, ~values.red, ~values.green, ~value,
          "gender", 0.5, 0.5, NA, NA, NA,
          "age", NA, NA, NA, NA, "50",
          "color", NA, NA, TRUE, FALSE, NA,
          "time_of_day", NA, NA, NA, NA, "noon")

library(tidyr)
library(dplyr)
library(purrr)

df_pivoted <- df_1 %>% 
  mutate(across(everything(), as.character)) %>% 
  pivot_longer(-key, names_to = "level", names_prefix = "^values\\.", values_drop_na = TRUE) %>% 
  group_by(key) %>% 
  nest() %>% 
  mutate(data = map(data, ~ if (all(.x$level == "value")) deframe(.x) else .x))
df_pivoted
#> # A tibble: 4 x 2
#> # Groups:   key [4]
#>   key         data            
#>   <chr>       <list>          
#> 1 gender      <tibble [2 × 2]>
#> 2 age         <chr [1]>       
#> 3 color       <tibble [2 × 2]>
#> 4 time_of_day <chr [1]>

EDIT Following the clarification in your comments on the desired result we could simply get rid of the map statement as the end (which basically was meant for converting the tibbles for categories without levels to a vector) and add a mutate statement before nesting to replace the level with NA for categories without a level:

pivot_nest <- function(x) {
  mutate(x, across(everything(), as.character)) %>% 
    pivot_longer(-key, names_to = "level", names_prefix = "^values\\.", values_drop_na = TRUE) %>% 
    group_by(key) %>% 
    mutate(level = ifelse(all(level == "value"), NA_character_, level)) %>% 
    nest() 
}

df_pivoted <- df_1 %>% 
  pivot_nest()
df_pivoted
#> # A tibble: 4 x 2
#> # Groups:   key [4]
#>   key         data            
#>   <chr>       <list>          
#> 1 gender      <tibble [2 × 2]>
#> 2 age         <tibble [1 × 2]>
#> 3 color       <tibble [2 × 2]>
#> 4 time_of_day <tibble [1 × 2]>
df_pivoted$data
#> [[1]]
#> # A tibble: 2 x 2
#>   level value
#>   <chr> <chr>
#> 1 male  0.5  
#> 2 male  0.5  
#> 
#> [[2]]
#> # A tibble: 1 x 2
#>   level value
#>   <chr> <chr>
#> 1 <NA>  50   
#> 
#> [[3]]
#> # A tibble: 2 x 2
#>   level value
#>   <chr> <chr>
#> 1 red   TRUE 
#> 2 red   FALSE
#> 
#> [[4]]
#> # A tibble: 1 x 2
#>   level value
#>   <chr> <chr>
#> 1 <NA>  noon

df_2 <- tribble(~key, ~value, "age", "50", "income", "100000", "time_of_day", "noon")

df_pivoted2 <- df_2 %>% 
  pivot_nest()
df_pivoted2
#> # A tibble: 3 x 2
#> # Groups:   key [3]
#>   key         data            
#>   <chr>       <list>          
#> 1 age         <tibble [1 × 2]>
#> 2 income      <tibble [1 × 2]>
#> 3 time_of_day <tibble [1 × 2]>
df_pivoted2$data
#> [[1]]
#> # A tibble: 1 x 2
#>   level value
#>   <chr> <chr>
#> 1 <NA>  50   
#> 
#> [[2]]
#> # A tibble: 1 x 2
#>   level value 
#>   <chr> <chr> 
#> 1 <NA>  100000
#> 
#> [[3]]
#> # A tibble: 1 x 2
#>   level value
#>   <chr> <chr>
#> 1 <NA>  noon

Why does pivot_wider either read single values as duplicates or create a wide-and-long tibble (without merging rows)?

We could create a sequence column with rowid

library(dplyr)
library(tidyr)
library(data.table)
df1 %>% 
  mutate(rn = rowid(Point, Species)) %>%
  pivot_wider(names_from = Species, values_from = Number, 
       values_fill = list(Number = '0'))

If we want all the combinations, use complete

df1 %>% 
   complete(Point, Layer, fill = list(Number = '0')) %>%
   fill(Species) %>%
   pivot_wider(names_from = Species, values_from = Number,  
         values_fill = list(Number = '0'))
# A tibble: 6 x 11
#  Point Layer Lari_deci Quer_rope Pinu_sylv Betu_pend Sorb_aucu Acer_pseu Popu_trem Fagu_sylv Pice_abie
#  <chr> <chr> <chr>     <chr>     <chr>     <chr>     <chr>     <chr>     <chr>     <chr>     <chr>    
#1 P03   C     21        17        5         1         0         0         0         0         0        
#2 P03   U     0         0         0         0         3         1         0         0         0        
#3 P06   C     3         28        28        0         0         0         6         0         0        
#4 P06   U     0         0         0         0         0         0         0         0         0        
#5 P07   C     0         3         20        1         0         0         0         110       5        
#6 P07   U     0         0         0         0         0         0         0         0         0

How do I pivot_wider() so that duplicates are maintained in their own columns?

Create a sequence column and it should work

library(dplyr)
library(tidyr)
library(data.table)
library(stringr)
tibble(long_box, surprise) %>%
     mutate(nm1= str_c('suprise_', rowid(long_box))) %>% 
     pivot_wider(names_from = nm1, values_from = surprise)

-output

# A tibble: 3 x 4
  long_box suprise_1 suprise_2 suprise_3
  <chr>    <chr>     <chr>     <chr>    
1 A        apple     orange    orange   
2 B        apple     banana    insects  
3 C        apple     insects   <NA>

Pivot_wider function (tidyr r package) from multiple variables

We can create a sequence column

library(dplyr)
library(tidyr)
library(data.table)
df %>% 
  mutate(ID = NULL, rn = rowid(Country, Season_ID)) %>%     
  pivot_wider(names_from = Gender,
          values_from = Region_UN,
          id_cols = c(rn, Country, Season_ID))

R - Pivoting Duplicate Rows into Multiple Column with Unknown Number of Columns