Adding a New Column Based Upon Values in Another Column Using Dplyr

Create new column based on existing columns whose names are stored in another column (dplyr)

A tidyverse option would be rowwise with extraction using cur_data()

library(dplyr)
df %>%
rowwise %>%
mutate(v4 = cur_data()[[v3]]) %>%
ungroup
# A tibble: 5 × 4
v1 v2 v3 v4
<int> <int> <chr> <int>
1 1 101 v1 1
2 2 102 v2 102
3 3 103 v1 3
4 4 104 v2 104
5 5 105 v1 5

Or a compact approach would be get after rowwise

df %>%
rowwise %>%
mutate(v4 = get(v3)) %>%
ungroup

Or in base R, use row/column indexing for faster execution

df$v4 <- as.data.frame(df[1:2])[cbind(seq_len(nrow(df)), 
match(df$v3, names(df)))]
df$v4
[1] 1 102 3 104 5

Add a new column based on change in values in other columns

We may use max.col

tmp <- names(DF)[max.col(DF, 'first')]
tmp[rowSums(DF == 1) == ncol(DF)|rowSums(DF == 0) == ncol(DF)] <- NA
DF$Switch <- tmp

-output

> DF
Col1 Col2 Col3 Switch
1 0 0 1 Col3
2 0 1 0 Col2
3 1 1 1 <NA>

R mutate new column based on range of values in other column

If I understand what you're trying to do, a base R solution could be:

df$new_column <- df$time %/% 250 + 1

The %/% operator is integer division (sort of the complement of the modulus operator) and tells you how many copies of 250 would fit into your number; we add 1 to get the value you want.

The tidyverse version:

df <- df %>%
mutate(new_column = time %/% 250 + 1)

r, dplyr: how to transform values in one column based on value in another column using gsub

str_remove is vectorized for the pattern instead of gsub

library(stringr)
library(dplyr)
df <- df %>%
mutate(x = str_remove(x, y))

-output

df
x y
1 bc a
2 ac b
3 abc d

If we want to use sub/gsub, then may need rowwise

df %>%
rowwise %>%
mutate(x = sub(y, "", x)) %>%
ungroup

R create new column based on data range at a certain time point

Instead of if_else nested, we could use case_when where we can have multiple conditions created, then do a group_by with 'Patient' and fill the 'Value_status' NA elements with the previous non-NA values

library(dplyr)
library(tidyr)
tb %>%
mutate(Value_status = case_when(Time == 1 & Value < 50 ~ "low",
Time == 1 & Value >= 50 ~ "high"
)) %>%
group_by(Patient) %>%
fill(Value_status) %>%
ungroup

-outupt

# A tibble: 15 x 5
RowID Patient Time Value Value_status
<chr> <chr> <dbl> <dbl> <chr>
1 A1 001 1 NA <NA>
2 A2 001 2 10 <NA>
3 A3 001 3 23 <NA>
4 A4 002 1 100 high
5 A5 002 2 30 high
6 A6 035 1 10 low
7 A7 035 2 15 low
8 A8 035 3 NA low
9 A9 035 4 60 low
10 A10 035 5 56.7 low
11 A11 100 1 30 low
12 A12 100 2 51 low
13 A13 105 1 3 low
14 A14 105 2 13 low
15 A15 105 3 77 low

add new column based on two other columns with several conditions, character

I like case_when from dplyr for these types of complex conditionals.

df<-tibble::tribble(
~job, ~honorary,
"yes", "yes",
"yes", "no",
"no", "yes",
"yes", "yes",
"yes", NA,
NA, "no"
)

library(dplyr)

df_new <- df %>%
mutate(result=case_when(
job=="yes" & honorary=="yes" ~ "both",
honorary=="yes" ~ "honorary",
job=="yes" ~ "job",
is.na(honorary) & is.na(job) ~ NA_character_,
is.na(honorary) & job=="no" ~ NA_character_,
is.na(job) & honorary=="no" ~ NA_character_,
TRUE ~ "other"
))

df_new
#> # A tibble: 6 × 3
#> job honorary result
#> <chr> <chr> <chr>
#> 1 yes yes both
#> 2 yes no job
#> 3 no yes honorary
#> 4 yes yes both
#> 5 yes <NA> job
#> 6 <NA> no <NA>

or in base R


df_new<-df

df_new=within(df_new,{
result=NA
result[ honorary=="yes"] = "honorary"
result[ job=="yes"] = "job"
result[job=="yes" & honorary=="yes"]='both'
})

Created on 2022-01-16 by the reprex package (v2.0.1)

R - Create new column based on substring from another column with conditions

There is probably a more efficient way to do this, but we could do a series of ifelse statements using case_when from tidyverse. First, I remove any rows that just end with ;s__. Then, in the series of statements, I check to if a given taxonomic level is present, then if so, then return that in the desired format. Then, that is repeated across all taxonomic levels.

library(tidyverse)

output <- input_data %>%
mutate(taxon = trimws(taxon, whitespace = ";s__")) %>%
mutate(taxon_main = case_when(str_detect(taxon, "s__") ~ trimws(str_replace_all(str_extract(taxon, "(?<=g__).*"), ";s_", ""), whitespace = '_'),
!str_detect(taxon, "s__") & str_detect(taxon, "g__")~ str_replace_all(str_extract(taxon, "g__.*"), "__", "_"),
!str_detect(taxon, "g__") & str_detect(taxon, "f__") ~ str_replace_all(str_extract(taxon, "f__.*"), "__", "_"),
!str_detect(taxon, "f__") & str_detect(taxon, "o__")~ str_replace_all(str_extract(taxon, "o__.*"), "__", "_"),
!str_detect(taxon, "o__") & str_detect(taxon, "c__")~ str_replace_all(str_extract(taxon, "c__.*"), "__", "_"),
!str_detect(taxon, "c__") & str_detect(taxon, "p__")~ str_replace_all(str_extract(taxon, "p__.*"), "__", "_"),
!str_detect(taxon, "p__") & str_detect(taxon, "k__")~ str_replace_all(str_extract(taxon, "k__.*"), "__", "_"),
TRUE ~ NA_character_))

Output

output %>% select(taxon_main)

taxon_main
1 Lactobacillus_crispatus
2 g_Anaerococcus
3 f_Comamonadaceae
4 f_Lachnospiraceae
5 Bosea_massiliensis
6 Acinetobacter_baumannii
7 f_Methylophilaceae

Or you could also use separate first, which will make the code less reliant on using a lot of stringr. We can clean up before using separate, such as only having one underscore and remove extra s__. Then, we can go through the ifelse statements, and then we can bind back to the original taxon column and drop all the other columns, except for taxon_main.

input_data %>%
mutate(taxon = trimws(taxon, whitespace = ";s__"),
taxon = str_replace_all(taxon, ";s__", ";"),
taxon = str_replace_all(taxon, "__", "_")) %>%
separate(taxon, sep = ";", into = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) %>%
mutate(taxon_main = case_when(!is.na(Species) ~ paste(str_extract(Genus, "(?<=g_).*"), Species, sep = "_"),
is.na(Species) & !is.na(Genus) ~ Genus,
is.na(Genus) & !is.na(Family) ~ Family,
is.na(Family) & !is.na(Order) ~ Order,
is.na(Order) & !is.na(Class) ~ Class,
is.na(Class) & !is.na(Phylum) ~ Phylum,
is.na(Phylum) & !is.na(Kingdom) ~ Kingdom
)) %>%
bind_cols(input_data,.) %>%
select(taxon_main, taxon)

Output

               taxon_main                                                                                                                     taxon
1 Lactobacillus_crispatus k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__crispatus
2 g_Anaerococcus k__Bacteria;p__Firmicutes;c__Tissierellia;o__Tissierellales;f__Peptoniphilaceae;g__Anaerococcus;s__
3 f_Comamonadaceae k__Bacteria;p__Proteobacteria;c__Betap__Proteobacteria;o__Burkholderiales;f__Comamonadaceae
4 f_Lachnospiraceae k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae
5 Bosea_massiliensis k__Bacteria;p__Proteobacteria;c__Alphap__Proteobacteria;o__Rhizobiales;f__Bradyrhizobiaceae;g__Bosea;s__massiliensis
6 Acinetobacter_baumannii k__Bacteria;p__Proteobacteria;c__Gammap__Proteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__Acinetobacter;s__baumannii
7 f_Methylophilaceae k__Bacteria;p__Proteobacteria;c__Betap__Proteobacteria;o__Nitrosomonadales;f__Methylophilaceae

Add columns with values based on another column using mutate?

You can use case_when (like pointed out in the comments). I used substr for the condition but you can use any string filter of your choice instead.

library(dplyr)

df %>%
mutate(type = case_when(
substr(id,1,1)=="R" ~ "reactor",
substr(id,1,1)=="P" ~ "patient",
substr(id,1,1)=="M" ~ "mock"))
id owner type
1 R1234 personA reactor
2 R5678 personA reactor
3 PAT12 personB patient
4 PAT34 personB patient
5 MOCK1 personB mock
6 MOCK2 personB mock


Related Topics



Leave a reply



Submit