Adding a New Column Based Upon Values in Another Column Using Dplyr

Create new column based on existing columns whose names are stored in another column (dplyr)

A tidyverse option would be rowwise with extraction using cur_data()

library(dplyr)
df %>% 
  rowwise %>%
  mutate(v4 = cur_data()[[v3]]) %>% 
  ungroup
# A tibble: 5 × 4
     v1    v2 v3       v4
  <int> <int> <chr> <int>
1     1   101 v1        1
2     2   102 v2      102
3     3   103 v1        3
4     4   104 v2      104
5     5   105 v1        5

Or a compact approach would be get after rowwise

df %>%
  rowwise %>%
  mutate(v4 = get(v3)) %>%
  ungroup

Or in base R, use row/column indexing for faster execution

df$v4 <- as.data.frame(df[1:2])[cbind(seq_len(nrow(df)), 
      match(df$v3, names(df)))]
df$v4
[1]   1 102   3 104   5

Add a new column based on change in values in other columns

We may use max.col

tmp <- names(DF)[max.col(DF, 'first')]
tmp[rowSums(DF == 1) == ncol(DF)|rowSums(DF == 0) == ncol(DF)] <- NA
DF$Switch <- tmp

-output

> DF
  Col1 Col2 Col3 Switch
1    0    0    1   Col3
2    0    1    0   Col2
3    1    1    1   <NA>

R mutate new column based on range of values in other column

If I understand what you're trying to do, a base R solution could be:

df$new_column <- df$time %/% 250 + 1

The %/% operator is integer division (sort of the complement of the modulus operator) and tells you how many copies of 250 would fit into your number; we add 1 to get the value you want.

The tidyverse version:

df <- df %>%
  mutate(new_column = time %/% 250 + 1)

r, dplyr: how to transform values in one column based on value in another column using gsub

str_remove is vectorized for the pattern instead of gsub

library(stringr)
library(dplyr)
df <- df %>% 
    mutate(x = str_remove(x, y))

-output

df
    x y
1  bc a
2  ac b
3 abc d

If we want to use sub/gsub, then may need rowwise

df %>%
   rowwise %>%
   mutate(x = sub(y, "", x)) %>%
   ungroup

R create new column based on data range at a certain time point

Instead of if_else nested, we could use case_when where we can have multiple conditions created, then do a group_by with 'Patient' and fill the 'Value_status' NA elements with the previous non-NA values

library(dplyr)
library(tidyr)
tb %>%
    mutate(Value_status = case_when(Time == 1 & Value < 50 ~ "low",
                        Time == 1 & Value >= 50 ~ "high"
                        )) %>%
   group_by(Patient) %>%
   fill(Value_status) %>%
   ungroup

-outupt

# A tibble: 15 x 5
   RowID Patient  Time Value Value_status
   <chr> <chr>   <dbl> <dbl> <chr>       
 1 A1    001         1  NA   <NA>        
 2 A2    001         2  10   <NA>        
 3 A3    001         3  23   <NA>        
 4 A4    002         1 100   high        
 5 A5    002         2  30   high        
 6 A6    035         1  10   low         
 7 A7    035         2  15   low         
 8 A8    035         3  NA   low         
 9 A9    035         4  60   low         
10 A10   035         5  56.7 low         
11 A11   100         1  30   low         
12 A12   100         2  51   low         
13 A13   105         1   3   low         
14 A14   105         2  13   low         
15 A15   105         3  77   low

add new column based on two other columns with several conditions, character

I like case_when from dplyr for these types of complex conditionals.

df<-tibble::tribble(
   ~job, ~honorary,
  "yes",     "yes",
  "yes",      "no",
   "no",     "yes",
  "yes",     "yes",
  "yes",        NA,
     NA,      "no"
  )

library(dplyr)

df_new <- df %>%
  mutate(result=case_when(
    job=="yes" & honorary=="yes" ~ "both",
    honorary=="yes" ~ "honorary", 
    job=="yes" ~ "job", 
    is.na(honorary) & is.na(job) ~ NA_character_, 
    is.na(honorary) & job=="no" ~ NA_character_, 
    is.na(job) & honorary=="no" ~ NA_character_, 
    TRUE ~ "other"
  ))

df_new
#> # A tibble: 6 × 3
#>   job   honorary result  
#>   <chr> <chr>    <chr>   
#> 1 yes   yes      both    
#> 2 yes   no       job     
#> 3 no    yes      honorary
#> 4 yes   yes      both    
#> 5 yes   <NA>     job     
#> 6 <NA>  no       <NA>

or in base R


df_new<-df

df_new=within(df_new,{
  result=NA
  result[ honorary=="yes"] = "honorary"
  result[ job=="yes"] = "job"
  result[job=="yes" & honorary=="yes"]='both'
})

^{Created on 2022-01-16 by the reprex package (v2.0.1)}

R - Create new column based on substring from another column with conditions

There is probably a more efficient way to do this, but we could do a series of ifelse statements using case_when from tidyverse. First, I remove any rows that just end with ;s__. Then, in the series of statements, I check to if a given taxonomic level is present, then if so, then return that in the desired format. Then, that is repeated across all taxonomic levels.

library(tidyverse)

output <- input_data %>%
  mutate(taxon = trimws(taxon, whitespace = ";s__")) %>%
  mutate(taxon_main = case_when(str_detect(taxon, "s__") ~ trimws(str_replace_all(str_extract(taxon, "(?<=g__).*"), ";s_", ""), whitespace = '_'),
                                !str_detect(taxon, "s__") & str_detect(taxon, "g__")~ str_replace_all(str_extract(taxon, "g__.*"), "__", "_"),
                                !str_detect(taxon, "g__") & str_detect(taxon, "f__") ~ str_replace_all(str_extract(taxon, "f__.*"), "__", "_"),
                                !str_detect(taxon, "f__") & str_detect(taxon, "o__")~ str_replace_all(str_extract(taxon, "o__.*"), "__", "_"),
                                !str_detect(taxon, "o__") & str_detect(taxon, "c__")~ str_replace_all(str_extract(taxon, "c__.*"), "__", "_"),
                                !str_detect(taxon, "c__") & str_detect(taxon, "p__")~ str_replace_all(str_extract(taxon, "p__.*"), "__", "_"),
                                !str_detect(taxon, "p__") & str_detect(taxon, "k__")~ str_replace_all(str_extract(taxon, "k__.*"), "__", "_"),
                                TRUE ~ NA_character_))

Output

output %>% select(taxon_main)

               taxon_main
1 Lactobacillus_crispatus
2          g_Anaerococcus
3        f_Comamonadaceae
4       f_Lachnospiraceae
5      Bosea_massiliensis
6 Acinetobacter_baumannii
7      f_Methylophilaceae

Or you could also use separate first, which will make the code less reliant on using a lot of stringr. We can clean up before using separate, such as only having one underscore and remove extra s__. Then, we can go through the ifelse statements, and then we can bind back to the original taxon column and drop all the other columns, except for taxon_main.

input_data %>%
  mutate(taxon = trimws(taxon, whitespace = ";s__"),
         taxon = str_replace_all(taxon, ";s__", ";"),
         taxon = str_replace_all(taxon, "__", "_")) %>%
  separate(taxon, sep = ";", into = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) %>%
  mutate(taxon_main = case_when(!is.na(Species) ~ paste(str_extract(Genus, "(?<=g_).*"), Species, sep = "_"),
                                is.na(Species) & !is.na(Genus) ~ Genus,
                                is.na(Genus) & !is.na(Family) ~ Family,
                                is.na(Family) & !is.na(Order) ~ Order,
                                is.na(Order) & !is.na(Class) ~ Class,
                                is.na(Class) & !is.na(Phylum) ~ Phylum,
                                is.na(Phylum) & !is.na(Kingdom) ~ Kingdom
                                )) %>% 
  bind_cols(input_data,.) %>% 
  select(taxon_main, taxon)

Output

               taxon_main                                                                                                                     taxon
1 Lactobacillus_crispatus                 k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__crispatus
2          g_Anaerococcus                       k__Bacteria;p__Firmicutes;c__Tissierellia;o__Tissierellales;f__Peptoniphilaceae;g__Anaerococcus;s__
3        f_Comamonadaceae                               k__Bacteria;p__Proteobacteria;c__Betap__Proteobacteria;o__Burkholderiales;f__Comamonadaceae
4       f_Lachnospiraceae                                               k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae
5      Bosea_massiliensis      k__Bacteria;p__Proteobacteria;c__Alphap__Proteobacteria;o__Rhizobiales;f__Bradyrhizobiaceae;g__Bosea;s__massiliensis
6 Acinetobacter_baumannii k__Bacteria;p__Proteobacteria;c__Gammap__Proteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__Acinetobacter;s__baumannii
7      f_Methylophilaceae                            k__Bacteria;p__Proteobacteria;c__Betap__Proteobacteria;o__Nitrosomonadales;f__Methylophilaceae

Add columns with values based on another column using mutate?

You can use case_when (like pointed out in the comments). I used substr for the condition but you can use any string filter of your choice instead.

library(dplyr)

df %>% 
  mutate(type = case_when(
                  substr(id,1,1)=="R" ~ "reactor", 
                  substr(id,1,1)=="P" ~ "patient", 
                  substr(id,1,1)=="M" ~ "mock"))
     id   owner    type
1 R1234 personA reactor
2 R5678 personA reactor
3 PAT12 personB patient
4 PAT34 personB patient
5 MOCK1 personB    mock
6 MOCK2 personB    mock