Expand Data Frame

expand a data frame in R

Many options available to get the desired result. But perhaps OP seems to be keen on using tidyr::expand. A solution can be as:

library(dplyr)
library(tidyr)

df %>% group_by(Var1, Var2) %>% expand(ID = 1:2) %>%
  arrange(ID)

# # A tibble: 8 x 3
# # Groups: Var1, Var2 [4]
#   Var1  Var2     ID
#   <chr> <chr> <int>
# 1 a     a         1
# 2 a     b         1
# 3 b     a         1
# 4 b     b         1
# 5 a     a         2
# 6 a     b         2
# 7 b     a         2
# 8 b     b         2

Data:

df <- read.table(text = 
"Var1                  Var2
  1                        a                     a
  2                        a                     b
  3                        b                     a
  4                        b                     b",
header = TRUE, stringsAsFactors = FALSE)

How to expand data frame based on values?

One "non-tidyverse" way:

data.frame(
  x = c('a', 'b', 'c'),
  y = c(4, 5, 6),
  from = c(1, 2, 3),
  to = c(2, 4, 6),
  stringsAsFactors = FALSE
) -> xdf

do.call(rbind.data.frame, lapply(1:nrow(xdf), function(i) {
  data.frame(x = xdf$x[i], y=xdf$y[i], z=xdf$from[i]:xdf$to[i], stringsAsFactors=FALSE)
}))

One "tidyverse" way:

library(tidyverse)

data_frame(
  x = c('a', 'b', 'c'),
  y = c(4, 5, 6),
  from = c(1, 2, 3),
  to = c(2, 4, 6)
) -> xdf

rowwise(xdf) %>% 
  do(data_frame(x = .$x, y=.$y, z=.$from:.$to))

Another "tidyverse" way that has not been benchmarked below:

xdf %>% 
  rowwise() %>% 
  do( merge( as_tibble(.), tibble(z=.$from:.$to), by=NULL) ) %>%
  select( -from, -to )     # Omit this line if you want to keep all original columns.

Since you asked abt performance:

library(microbenchmark)

data.table::data.table(
  x = c('a','b','c'),
  y = c(4,5,6),
  from = c(1,2,3),
  to = c(2,4,6)
) -> xdt1

data.frame(
  x = c('a', 'b', 'c'),
  y = c(4, 5, 6),
  from = c(1, 2, 3),
  to = c(2, 4, 6),
  stringsAsFactors = FALSE
) -> xdf1

data.table ops often modify in-place so keep a level playing field and make a copy of each data frame/table before doing the op.

That time penalty is ~100 nanoseconds on most modern systems.

microbenchmark(

  data.table = {
    xdt2 <- xdt1
    xdt2[, diff:= (to - from) + 1]
    xdt2 <- xdt2[rep(1:.N, diff)]
    xdt2[,z := seq(from,to), by=.(x,y,from,to)]
    xdt2[,c("x", "y", "z")]
  }, 

  base = {
    xdf2 <- xdf1
    do.call(rbind.data.frame, lapply(1:nrow(xdf2), function(i) {
      data.frame(x = xdf2$x[i], y=xdf2$y[i], z=xdf2$from[i]:xdf2$to[i], stringsAsFactors=FALSE)
    }))
  }, 

  tidyverse = {
    xdf2 <- xdf1
    dplyr::rowwise(xdf2) %>% 
      dplyr::do(dplyr::data_frame(x = .$x, y=.$y, z=.$from:.$to))
  }, 

  plyr = {
    xdf2 <- xdf1
    plyr::mdply(xdf2, function(x,y,from,to) data.frame(x,y,z=seq(from,to)))[c("x","y","z")]
  },

  times = 1000

)
## Unit: microseconds
##        expr       min         lq       mean    median         uq        max neval
##  data.table   920.361  1072.9265  1257.2321  1178.832  1280.2660  10628.552  1000
##        base   677.069   761.3145   884.4136   825.472   915.8985   5366.515  1000
##   tidyverse 15926.127 17231.5015 19201.4798 17994.919 20014.4140 166901.570  1000
##        plyr  1938.838  2196.4205  2448.5314  2322.949  2501.5075   5735.255  1000

Expand data.frame by adding new column

One way with lapply:

do.call(rbind, lapply(df$x, function(z) {
  cbind(z, df2)
}))
#     z y
#1 1871 1
#2 1871 2
#3 1871 3
#4 1872 1
#5 1872 2
#6 1872 3

lapply iterates over df$x and cbinds the whole df2 to each element of df$x. do.call combines everything together in one data.frame.

Expand a data frame by group

use tidyr::pivot_wider with names_glue argument as follows.

Store name of all variables (even 500) to be pivoted into a vector say cols
Use values_from = all_of(cols) as argument in pivot_wider

cols <- c('X1', 'X2', 'X5')
df %>% pivot_wider(id_cols = grp, names_from = X, values_from = all_of(cols), 
                  names_glue = '{X}-{.value}')

# A tibble: 2 x 10
  grp        `1-X1` `2-X1` `5-X1` `1-X2` `2-X2` `5-X2` `1-X5` `2-X5` `5-X5`
  <chr>       <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
1 2020_01_19     23     13     23     47     45     41      3     54     21
2 2020_01_20     65     39     43     32     52     76     19     12     90

If you want to use all columns except first two, use this

df %>% pivot_wider(id_cols = grp, names_from = X, values_from = !c(grp, X), 
                   names_glue = '{X}-{.value}')

# A tibble: 2 x 10
  grp        `1-X1` `2-X1` `5-X1` `1-X2` `2-X2` `5-X2` `1-X5` `2-X5` `5-X5`
  <chr>       <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
1 2020_01_19     23     13     23     47     45     41      3     54     21
2 2020_01_20     65     39     43     32     52     76     19     12     90

However, if you want to rearrange columns as shown in expected outcome, you may use names_vary = 'slowest' in pivot_wider function of tidyr 1.2.0.

Expand data frame and add a new variable

With tidyr you can use expand - this will expand your data frame to all combinations of values with your sequence of 1 to 3:

library(tidyverse)

df %>%
  group_by(Location, year, group1, Value) %>%
  expand(group2 = 1:3)

Output

   Location  year group1 Value group2
   <fct>    <dbl>  <int> <fct>  <int>
 1 a         2020      1 x          1
 2 a         2020      1 x          2
 3 a         2020      1 x          3
 4 a         2020      2 y          1
 5 a         2020      2 y          2
 6 a         2020      2 y          3
 ...

Your approach looks close, and I suppose you could just add on group2 like this:

cbind(df[rep(seq_len(nrow(df)), each = 3), ], group2 = 1:3)

Expand the Dataframe - Adding Rows not Columns

You can use tidyr::expand_grid()

library(tidyr)

expand_grid(KRS = fs$KRS,V2 = 1:6)

tidyr::expand():

fs %>% 
  tidyr::expand(KRS, V2 = 1:6)

Or even expand.grid() from base R

expand.grid(fs$KRS, 1:6)

Expand and then fill a dataframe

Check out the fill() function function through tidyverse.

Using your example, but inducing the NA's you mention, df5 should be what you're looking for here.

library( tidyverse )
year <- c(2014, 2019, 2021)
price <- c(100, 110, 120)
df1 <- data.frame(cbind(id=1, year, price))

year <- c(2016, 2019, 2021)
price <- c(200, 210, 220)
df2 <- data.frame(cbind(id=2, year, price))

year <-c (2014, 2015, 2019, 2020)
price <-c (300, 310, 320, 330)
df3 <- data.frame(cbind(id=3, year, price))

list1 <- list(df1, df2, df3)

id <- c(rep(1,8), rep(2,8), rep(3,8))
year <- c(rep(seq(2014,2021), 3))
price <- c(100, NA, NA, NA, NA, 110, NA, 120,
           NA, NA, 200, NA, NA, 210, 210, 220,
           300, 310, 310, 310, 310, 320, 330, 330)
df4 <- data.frame(id, year, price)
df5 <- df4 %>% group_by( id ) %>% fill( price, .direction = "downup" )