R: Fill Down Multiple Columns

R: fill down multiple columns

We can use fill_ when we select variables with names.

library(tidyr)# using tidyr_0.4.1.9000
res <- fill_(df, names(df))
head(res)
# col1 col2 col3
#1 1 NA b
#2 1 3 b
#3 2 4 a
#4 2 4 a
#5 2 1 a
#6 3 4 a

Other option would be

fill(df, everything())

However, if we use fill with names(df)), it will give the same error as the OP showed

fill(df, names(df)[1])
#Error: All select() inputs must resolve to integer column positions.
#The following do not:
#* names(df)[1]

data

set.seed(24)
df <- data.frame(col1= sample(c(NA, 1:3), 20, replace=TRUE),
col2 = sample(c(NA, 1:5), 20, replace=TRUE),
col3 = sample(c(NA, letters[1:5]), 20, replace=TRUE),
stringsAsFactors=FALSE)

Fill multiple columns in a R dataframe

You can expand multiple columns in complete. Let's say if you need data till week 8, you can do :

tidyr::complete(flu, isoweek = 1:8, group, fill = list(n = 0))

# A tibble: 16 x 3
# isoweek group n
# <dbl> <chr> <dbl>
# 1 1 fluA 5
# 2 1 fluB 6
# 3 2 fluA 3
# 4 2 fluB 5
# 5 3 fluA 12
# 6 3 fluB 14
# 7 4 fluA 6
# 8 4 fluB 0
# 9 5 fluA 23
#10 5 fluB 25
#11 6 fluA 0
#12 6 fluB 0
#13 7 fluA 0
#14 7 fluB 0
#15 8 fluA 0
#16 8 fluB 0

Fill out multiple columns in one dataset with data from another dataset

df <- read.table(text = '  sub_id diag_1 diag_2 diag_3
1 1 S019 T028 S021
2 2 S520 "" ""
3 3 S320 S270 S324
4 4 S023 "" S109
5 5 S826 S420 S729
', header = TRUE)

df_lookup <- read.table(header = T, text = " diag_map ais iss_br
1 S019 1 6
2 S020 3 6
3 S021 2 1
4 S025 1 1
5 S109 1 1
6 S110 5 2
7 S270 0 0
8 S320 0 0
9 S420 4 4
10 S520 2 5
11 S729 2 3
12 T028 1 0")

library(tidyverse)
df %>%
mutate(across(2:4, ~ df_lookup$ais[match(., df_lookup$diag_map)],
.names = '{.col}_ais'),
across(2:4, ~df_lookup$iss_br[match(., df_lookup$diag_map)],
.names = '{.col}_iss_br'))
#> sub_id diag_1 diag_2 diag_3 diag_1_ais diag_2_ais diag_3_ais diag_1_iss_br
#> 1 1 S019 T028 S021 1 1 2 6
#> 2 2 S520 2 NA NA 5
#> 3 3 S320 S270 S324 0 0 NA 0
#> 4 4 S023 S109 NA NA 1 NA
#> 5 5 S826 S420 S729 NA 4 2 NA
#> diag_2_iss_br diag_3_iss_br
#> 1 0 1
#> 2 NA NA
#> 3 0 NA
#> 4 NA 1
#> 5 4 3

Created on 2021-08-12 by the reprex package (v2.0.0)

Fill down constant value, add 1 when condition is met in dplyr

We can use cumsum with %in% and mutate.

library(dplyr)

dat2 <- dat %>%
mutate(n = cumsum(state %in% "break") + 1)
dat2
# step state n
# 1 1 active 1
# 2 2 active 1
# 3 2 break 2
# 4 1 active 2
# 5 2 error 2
# 6 3 active 2
# 7 3 break 3
# 8 1 active 3

Data

dat <- read.table(text = "step   state   
1 active
2 active
2 break
1 active
2 error
3 active
3 break
1 active",
header = TRUE)

Fill both numerical and character columns, conditional on the previous and next value being equal

Using dplyr:

library(dplyr)
dat2 |>
mutate(type = as.character(type)) |>
mutate(across(2:4,
~ ifelse(is.na(.) & lag(areacode) == lead(areacode) & lag(type) == lead(type),
lag(.),
.)))

zipcode areacode type region
1 1001 4 clay 3
2 1002 4 clay 3
3 1003 NA <NA> NA
4 1004 1 sand 3

dat |>
mutate(type = as.character(type)) |>
mutate(across(2:4,
~ ifelse(is.na(.) & lag(areacode) == lead(areacode) & lag(type) == lead(type),
lag(.),
.)))

zipcode areacode type region
1 1001 4 clay 3
2 1002 4 clay 3
3 1003 4 clay 3
4 1004 4 clay 3

Pivot wider in R with multiple columns

Try this solution.

Most of the work was creating an separate ID column and then creating the unique names for the columns.

library(tidyr)
library(dplyr)
library(vctrs)

df<- structure(list(col1 = c("ID", "animal", "animal", "animal", "shape", "ID", "animal", "shape"),
col2 = c("55.", "dog", "bear", "rabbit", "circle", "67.", "cat", "square")),
class = "data.frame", row.names = c(NA, -8L))

#create the ID column
df$ID <- NA
#find the ID rows
idrows <- which(df$col1 == "ID")
#fill column and delete rows
df$ID[idrows] <- df$col2[idrows]
df <- fill(df, ID, .direction = "down")
df <- df[-idrows, ]

#create unique names in each grouping and the pivot wider
df %>% group_by(ID) %>%
mutate(col1=vec_as_names(col1, repair = "unique")) %>%
mutate(col1=stringr::str_replace( col1, "\\.+1", "")) %>%
ungroup() %>%
pivot_wider(id_cols = "ID", names_from = "col1", values_from = "col2")

ID animal animal...2 animal...3 shape
<chr> <chr> <chr> <chr> <chr>
1 55. dog bear rabbit circle
2 67. cat NA NA square

Another alternatives based on one of your previous questions:

df %>%  group_by(ID) %>% 
mutate(col1 = paste0(col1, data.table::rowid(col1))) %>%
ungroup() %>%
pivot_wider(id_cols = "ID", names_from = "col1", values_from = "col2")

or

df %>% 
pivot_wider(id_cols = "ID", names_from = "col1", values_from = "col2") %>%
unnest_wider( "shape", names_sep = "_") %>% unnest_wider( "animal", names_sep = "_")

forward fill all missing values for all variables

We can convert the column names to symbols with syms and evaluate (!!!)

d1 %>% 
tidyr::fill(!!! rlang::syms(names(.)), .direction = 'down')
# A tibble: 3 x 2
# var1 var2
# <dbl> <dbl>
#1 NA NA
#2 1 3
#3 1 3

data

d1 <- data_frame(var1 = c(NA, 1 , NA), var2 = c (NA, 3, NA))

R: How to replace NA with most recent value by row

There are a series of non-base solutions:

zoo::na.locf(df$Value)
data.table::nafill(df$Value)

naniar is also a package that is completely designed surrounding NA handling.



Related Topics



Leave a reply



Submit