R: Fill Down Multiple Columns

R: fill down multiple columns

We can use fill_ when we select variables with names.

library(tidyr)# using tidyr_0.4.1.9000
res <- fill_(df, names(df))
head(res)
#   col1 col2 col3
#1    1   NA    b
#2    1    3    b
#3    2    4    a
#4    2    4    a
#5    2    1    a
#6    3    4    a

Other option would be

fill(df, everything())

However, if we use fill with names(df)), it will give the same error as the OP showed

fill(df, names(df)[1])
#Error: All select() inputs must resolve to integer column positions.
#The following do not:
#*  names(df)[1]

data

set.seed(24)
 df <- data.frame(col1= sample(c(NA, 1:3), 20, replace=TRUE), 
                  col2 = sample(c(NA, 1:5), 20, replace=TRUE),
                  col3 = sample(c(NA, letters[1:5]), 20, replace=TRUE),
                  stringsAsFactors=FALSE)

Fill multiple columns in a R dataframe

You can expand multiple columns in complete. Let's say if you need data till week 8, you can do :

tidyr::complete(flu, isoweek = 1:8, group, fill = list(n = 0))

# A tibble: 16 x 3
#   isoweek group     n
#     <dbl> <chr> <dbl>
# 1       1 fluA      5
# 2       1 fluB      6
# 3       2 fluA      3
# 4       2 fluB      5
# 5       3 fluA     12
# 6       3 fluB     14
# 7       4 fluA      6
# 8       4 fluB      0
# 9       5 fluA     23
#10       5 fluB     25
#11       6 fluA      0
#12       6 fluB      0
#13       7 fluA      0
#14       7 fluB      0
#15       8 fluA      0
#16       8 fluB      0

Fill out multiple columns in one dataset with data from another dataset

df <- read.table(text = '  sub_id diag_1 diag_2 diag_3
1      1   S019   T028   S021
2      2   S520   ""  ""         
3      3   S320   S270   S324
4      4   S023   ""       S109
5      5   S826   S420   S729
', header = TRUE)

df_lookup <- read.table(header = T, text = "  diag_map ais iss_br
1      S019   1      6
2      S020   3      6
3      S021   2      1
4      S025   1      1
5      S109   1      1
6      S110   5      2
7      S270   0      0
8      S320   0      0
9      S420   4      4
10     S520   2      5
11     S729   2      3
12     T028   1      0")

library(tidyverse)
df %>%
  mutate(across(2:4, ~ df_lookup$ais[match(., df_lookup$diag_map)],
                .names = '{.col}_ais'),
         across(2:4, ~df_lookup$iss_br[match(., df_lookup$diag_map)],
         .names = '{.col}_iss_br'))
#>   sub_id diag_1 diag_2 diag_3 diag_1_ais diag_2_ais diag_3_ais diag_1_iss_br
#> 1      1   S019   T028   S021          1          1          2             6
#> 2      2   S520                        2         NA         NA             5
#> 3      3   S320   S270   S324          0          0         NA             0
#> 4      4   S023          S109         NA         NA          1            NA
#> 5      5   S826   S420   S729         NA          4          2            NA
#>   diag_2_iss_br diag_3_iss_br
#> 1             0             1
#> 2            NA            NA
#> 3             0            NA
#> 4            NA             1
#> 5             4             3

^{Created on 2021-08-12 by the reprex package (v2.0.0)}

Fill down constant value, add 1 when condition is met in dplyr

We can use cumsum with %in% and mutate.

library(dplyr)

dat2 <- dat %>%
  mutate(n = cumsum(state %in% "break") + 1)
dat2
#   step  state n
# 1    1 active 1
# 2    2 active 1
# 3    2  break 2
# 4    1 active 2
# 5    2  error 2
# 6    3 active 2
# 7    3  break 3
# 8    1 active 3

Data

dat <- read.table(text = "step   state   
1      active
2      active
2       break
1      active
2      error
3      active
3       break
1      active",
                  header = TRUE)

Fill both numerical and character columns, conditional on the previous and next value being equal

Using dplyr:

library(dplyr)
dat2 |> 
  mutate(type = as.character(type)) |> 
  mutate(across(2:4,
                ~ ifelse(is.na(.) & lag(areacode) == lead(areacode) & lag(type) == lead(type),
                         lag(.),
                         .)))

  zipcode areacode type region
1    1001        4 clay      3
2    1002        4 clay      3
3    1003       NA <NA>     NA
4    1004        1 sand      3

dat |> 
  mutate(type = as.character(type)) |> 
  mutate(across(2:4,
                ~ ifelse(is.na(.) & lag(areacode) == lead(areacode) & lag(type) == lead(type),
                         lag(.),
                         .)))

  zipcode areacode type region
1    1001        4 clay      3
2    1002        4 clay      3
3    1003        4 clay      3
4    1004        4 clay      3

Pivot wider in R with multiple columns

Try this solution.

Most of the work was creating an separate ID column and then creating the unique names for the columns.

library(tidyr)
library(dplyr)
library(vctrs)

df<- structure(list(col1 = c("ID", "animal", "animal", "animal", "shape", "ID", "animal", "shape"), 
                  col2 = c("55.", "dog", "bear", "rabbit", "circle", "67.", "cat", "square")), 
               class = "data.frame", row.names = c(NA, -8L))
   
#create the ID column
df$ID <- NA
#find the ID rows
idrows <- which(df$col1 == "ID")
#fill column and delete rows
df$ID[idrows] <- df$col2[idrows]
df <- fill(df, ID, .direction = "down")
df <- df[-idrows, ]

 #create unique names in each grouping and the pivot wider
df %>% group_by(ID) %>% 
   mutate(col1=vec_as_names(col1, repair = "unique")) %>%  
   mutate(col1=stringr::str_replace( col1, "\\.+1", "")) %>% 
   ungroup()  %>%
   pivot_wider(id_cols = "ID", names_from = "col1", values_from = "col2")

  ID    animal  animal...2 animal...3 shape
  <chr> <chr>   <chr>      <chr>      <chr> 
1 55.   dog     bear       rabbit     circle
2 67.   cat     NA         NA         square

Another alternatives based on one of your previous questions:

df %>%  group_by(ID) %>% 
   mutate(col1 = paste0(col1, data.table::rowid(col1))) %>% 
   ungroup()  %>%
   pivot_wider(id_cols = "ID", names_from = "col1", values_from = "col2")

df %>% 
   pivot_wider(id_cols = "ID", names_from = "col1", values_from = "col2") %>%
    unnest_wider(  "shape", names_sep = "_") %>% unnest_wider(  "animal", names_sep = "_")

forward fill all missing values for all variables

We can convert the column names to symbols with syms and evaluate (!!!)

d1 %>% 
  tidyr::fill(!!! rlang::syms(names(.)), .direction = 'down')
# A tibble: 3 x 2
#   var1  var2
#  <dbl> <dbl>
#1    NA    NA
#2     1     3
#3     1     3

data

d1 <- data_frame(var1 = c(NA, 1 , NA), var2 = c (NA, 3, NA))

R: How to replace NA with most recent value by row

There are a series of non-base solutions:

zoo::na.locf(df$Value)
data.table::nafill(df$Value)

naniar is also a package that is completely designed surrounding NA handling.

R: Fill Down Multiple Columns