Create new column based on existing columns whose names are stored in another column (dplyr)
A tidyverse
option would be rowwise
with extraction using cur_data()
library(dplyr)
df %>%
rowwise %>%
mutate(v4 = cur_data()[[v3]]) %>%
ungroup
# A tibble: 5 × 4
v1 v2 v3 v4
<int> <int> <chr> <int>
1 1 101 v1 1
2 2 102 v2 102
3 3 103 v1 3
4 4 104 v2 104
5 5 105 v1 5
Or a compact approach would be get
after rowwise
df %>%
rowwise %>%
mutate(v4 = get(v3)) %>%
ungroup
Or in base R
, use row/column indexing for faster execution
df$v4 <- as.data.frame(df[1:2])[cbind(seq_len(nrow(df)),
match(df$v3, names(df)))]
df$v4
[1] 1 102 3 104 5
Add a new column based on change in values in other columns
We may use max.col
tmp <- names(DF)[max.col(DF, 'first')]
tmp[rowSums(DF == 1) == ncol(DF)|rowSums(DF == 0) == ncol(DF)] <- NA
DF$Switch <- tmp
-output
> DF
Col1 Col2 Col3 Switch
1 0 0 1 Col3
2 0 1 0 Col2
3 1 1 1 <NA>
R mutate new column based on range of values in other column
If I understand what you're trying to do, a base
R solution could be:
df$new_column <- df$time %/% 250 + 1
The %/%
operator is integer division (sort of the complement of the modulus operator) and tells you how many copies of 250 would fit into your number; we add 1 to get the value you want.
The tidyverse
version:
df <- df %>%
mutate(new_column = time %/% 250 + 1)
r, dplyr: how to transform values in one column based on value in another column using gsub
str_remove
is vectorized for the pattern
instead of gsub
library(stringr)
library(dplyr)
df <- df %>%
mutate(x = str_remove(x, y))
-output
df
x y
1 bc a
2 ac b
3 abc d
If we want to use sub/gsub
, then may need rowwise
df %>%
rowwise %>%
mutate(x = sub(y, "", x)) %>%
ungroup
R create new column based on data range at a certain time point
Instead of if_else
nested, we could use case_when
where we can have multiple conditions created, then do a group_by
with 'Patient' and fill
the 'Value_status' NA
elements with the previous non-NA values
library(dplyr)
library(tidyr)
tb %>%
mutate(Value_status = case_when(Time == 1 & Value < 50 ~ "low",
Time == 1 & Value >= 50 ~ "high"
)) %>%
group_by(Patient) %>%
fill(Value_status) %>%
ungroup
-outupt
# A tibble: 15 x 5
RowID Patient Time Value Value_status
<chr> <chr> <dbl> <dbl> <chr>
1 A1 001 1 NA <NA>
2 A2 001 2 10 <NA>
3 A3 001 3 23 <NA>
4 A4 002 1 100 high
5 A5 002 2 30 high
6 A6 035 1 10 low
7 A7 035 2 15 low
8 A8 035 3 NA low
9 A9 035 4 60 low
10 A10 035 5 56.7 low
11 A11 100 1 30 low
12 A12 100 2 51 low
13 A13 105 1 3 low
14 A14 105 2 13 low
15 A15 105 3 77 low
add new column based on two other columns with several conditions, character
I like case_when
from dplyr
for these types of complex conditionals.
df<-tibble::tribble(
~job, ~honorary,
"yes", "yes",
"yes", "no",
"no", "yes",
"yes", "yes",
"yes", NA,
NA, "no"
)
library(dplyr)
df_new <- df %>%
mutate(result=case_when(
job=="yes" & honorary=="yes" ~ "both",
honorary=="yes" ~ "honorary",
job=="yes" ~ "job",
is.na(honorary) & is.na(job) ~ NA_character_,
is.na(honorary) & job=="no" ~ NA_character_,
is.na(job) & honorary=="no" ~ NA_character_,
TRUE ~ "other"
))
df_new
#> # A tibble: 6 × 3
#> job honorary result
#> <chr> <chr> <chr>
#> 1 yes yes both
#> 2 yes no job
#> 3 no yes honorary
#> 4 yes yes both
#> 5 yes <NA> job
#> 6 <NA> no <NA>
or in base R
df_new<-df
df_new=within(df_new,{
result=NA
result[ honorary=="yes"] = "honorary"
result[ job=="yes"] = "job"
result[job=="yes" & honorary=="yes"]='both'
})
Created on 2022-01-16 by the reprex package (v2.0.1)
R - Create new column based on substring from another column with conditions
There is probably a more efficient way to do this, but we could do a series of ifelse
statements using case_when
from tidyverse
. First, I remove any rows that just end with ;s__
. Then, in the series of statements, I check to if a given taxonomic level is present, then if so, then return that in the desired format. Then, that is repeated across all taxonomic levels.
library(tidyverse)
output <- input_data %>%
mutate(taxon = trimws(taxon, whitespace = ";s__")) %>%
mutate(taxon_main = case_when(str_detect(taxon, "s__") ~ trimws(str_replace_all(str_extract(taxon, "(?<=g__).*"), ";s_", ""), whitespace = '_'),
!str_detect(taxon, "s__") & str_detect(taxon, "g__")~ str_replace_all(str_extract(taxon, "g__.*"), "__", "_"),
!str_detect(taxon, "g__") & str_detect(taxon, "f__") ~ str_replace_all(str_extract(taxon, "f__.*"), "__", "_"),
!str_detect(taxon, "f__") & str_detect(taxon, "o__")~ str_replace_all(str_extract(taxon, "o__.*"), "__", "_"),
!str_detect(taxon, "o__") & str_detect(taxon, "c__")~ str_replace_all(str_extract(taxon, "c__.*"), "__", "_"),
!str_detect(taxon, "c__") & str_detect(taxon, "p__")~ str_replace_all(str_extract(taxon, "p__.*"), "__", "_"),
!str_detect(taxon, "p__") & str_detect(taxon, "k__")~ str_replace_all(str_extract(taxon, "k__.*"), "__", "_"),
TRUE ~ NA_character_))
Output
output %>% select(taxon_main)
taxon_main
1 Lactobacillus_crispatus
2 g_Anaerococcus
3 f_Comamonadaceae
4 f_Lachnospiraceae
5 Bosea_massiliensis
6 Acinetobacter_baumannii
7 f_Methylophilaceae
Or you could also use separate
first, which will make the code less reliant on using a lot of stringr
. We can clean up before using separate
, such as only having one underscore and remove extra s__
. Then, we can go through the ifelse
statements, and then we can bind back to the original taxon
column and drop all the other columns, except for taxon_main
.
input_data %>%
mutate(taxon = trimws(taxon, whitespace = ";s__"),
taxon = str_replace_all(taxon, ";s__", ";"),
taxon = str_replace_all(taxon, "__", "_")) %>%
separate(taxon, sep = ";", into = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) %>%
mutate(taxon_main = case_when(!is.na(Species) ~ paste(str_extract(Genus, "(?<=g_).*"), Species, sep = "_"),
is.na(Species) & !is.na(Genus) ~ Genus,
is.na(Genus) & !is.na(Family) ~ Family,
is.na(Family) & !is.na(Order) ~ Order,
is.na(Order) & !is.na(Class) ~ Class,
is.na(Class) & !is.na(Phylum) ~ Phylum,
is.na(Phylum) & !is.na(Kingdom) ~ Kingdom
)) %>%
bind_cols(input_data,.) %>%
select(taxon_main, taxon)
Output
taxon_main taxon
1 Lactobacillus_crispatus k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__crispatus
2 g_Anaerococcus k__Bacteria;p__Firmicutes;c__Tissierellia;o__Tissierellales;f__Peptoniphilaceae;g__Anaerococcus;s__
3 f_Comamonadaceae k__Bacteria;p__Proteobacteria;c__Betap__Proteobacteria;o__Burkholderiales;f__Comamonadaceae
4 f_Lachnospiraceae k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae
5 Bosea_massiliensis k__Bacteria;p__Proteobacteria;c__Alphap__Proteobacteria;o__Rhizobiales;f__Bradyrhizobiaceae;g__Bosea;s__massiliensis
6 Acinetobacter_baumannii k__Bacteria;p__Proteobacteria;c__Gammap__Proteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__Acinetobacter;s__baumannii
7 f_Methylophilaceae k__Bacteria;p__Proteobacteria;c__Betap__Proteobacteria;o__Nitrosomonadales;f__Methylophilaceae
Add columns with values based on another column using mutate?
You can use case_when
(like pointed out in the comments). I used substr
for the condition but you can use any string filter of your choice instead.
library(dplyr)
df %>%
mutate(type = case_when(
substr(id,1,1)=="R" ~ "reactor",
substr(id,1,1)=="P" ~ "patient",
substr(id,1,1)=="M" ~ "mock"))
id owner type
1 R1234 personA reactor
2 R5678 personA reactor
3 PAT12 personB patient
4 PAT34 personB patient
5 MOCK1 personB mock
6 MOCK2 personB mock
Related Topics
Counting the Number of Elements With the Values of X in a Vector
Gather Multiple Sets of Columns
Convert Data from Long Format to Wide Format With Multiple Measure Columns
Difference Between Require() and Library()
Linear Regression and Group by in R
Extract the Maximum Value Within Each Group in a Dataframe
Formatting Decimal Places in R
Group by Multiple Columns and Sum Other Multiple Columns
Select/Assign to Data.Table When Variable Names Are Stored in a Character Vector
Does Ifelse Really Calculate Both of Its Vectors Every Time? Is It Slow
How to Sort a Character Vector Where Elements Contain Letters and Numbers
How to Get Summary Statistics by Group
How to Remove All Duplicates So That None Are Left in a Data Frame
How to Save a Plot as Image on the Disk
How to Call an Object With the Character Variable of the Same Name