How to add a factor column to dataframe based on a conditional statement from another column?
you can use ifelse like this
dataframe$periodframe <- ifelse(dataframe$year > 1991,"post-1991", "pre-1991")
Create a factor variable based on conditions across multiple columns in R
Here's a tidy solution:
library(dplyr)
library(tidyr)
dat <- expand.grid(id = 1:3,
num=1:6)
dat$Sys <- NA
dat$Sys[which(dat$id == 1)] <- runif(6, 10, 100)
dat$Sys[which(dat$id != 1)] <- runif(12, 110, 145)
dat$Dia <- NA
dat$Dia[which(dat$id == 1)] <- runif(6, 91, 125)
dat$Dia[which(dat$id != 1)] <- runif(12, 70, 95)
dat <- dat %>% pivot_wider(values_from=c("Sys", "Dia"),
names_from="num",
names_sep="")
dat %>%
rowwise() %>%
mutate(CH = case_when(any(c_across(contains("Sys")) >= 140) |
any(c_across(contains("Dia")) <= 90) ~ 1,
TRUE ~ -0))
#> # A tibble: 3 × 14
#> # Rowwise:
#> id Sys1 Sys2 Sys3 Sys4 Sys5 Sys6 Dia1 Dia2 Dia3 Dia4 Dia5 Dia6
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 71.3 78.2 47.4 53.6 67.0 47.5 107. 114. 106. 112. 104. 108.
#> 2 2 114. 113. 125. 142. 142. 116. 71.8 82.2 73.4 75.8 70.4 93.1
#> 3 3 144. 136. 118. 112. 133. 126. 77.6 88.2 85.6 91.6 75.9 77.9
#> # … with 1 more variable: CH <dbl>
Created on 2022-06-28 by the reprex package (v2.0.1)
Creating a factor column based on unknown numbers in another column
We could do this automatically by either match
applied on the unique
values of 'Region_ID' to return the index and then paste
with 'BNR' substring or convert to factor
with levels
specified as unique(Region_ID)
and coerce to integer with as.integer
list.files(path = 'Z:/fishproj/Cambodia Dai project/Analytic/Flux/River_Width/Dai3C', #identifies all .csv files associated with Dai15 full water column Sv measurements and compiles them into one data frame
pattern = "^Dai3D_ABC_10mbin_20211209_fullwatercolumn_evening_BNR*.*csv", full.names = TRUE) %>%
map_dfr(read_csv) %>%
mutate(BNR = str_c("BNR", match(Region_ID, unique(Region_ID))))
Create a new factor level (new row) based on data from other rows with conditional statements
Here is one tidyverse approach.
library(tidyverse)
df %>%
pivot_wider(names_from = Timepoint,
values_from = A:G
) %>%
mutate(A_X = ifelse(A_B > 999, A_B, A_A),
B_X = ifelse(B_B > 986, B_B, B_A),
C_X = ifelse(C_B > 1000, C_B, C_A),
D_X = ifelse(D_B > 1030, D_B, D_A),
E_X = ifelse(E_B > 800, E_B, E_A),
G_X = ifelse(G_B > 950, G_B, G_A)) %>%
pivot_longer(cols = A_A:G_X,
names_sep = "_",
names_to = c("cat","Timepoint")) %>%
pivot_wider(names_from = cat,
values_from = value)
#> # A tibble: 24 x 8
#> PatientID Timepoint A B C D E G
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0002 A NA 999. 987. 962. 1006. NA
#> 2 0002 B 977. NA 962. 929. 1028. 959.
#> 3 0002 X NA NA 987. 962. 1028. 959.
#> 4 0005 A NA 999. 929. 978. 954. 925.
#> 5 0005 B 964. NA 978. 1006. 1006. 956.
#> 6 0005 X NA NA 929. 978. 1006. 956.
#> 7 0009 A NA 1021. 969. 926. 950. NA
#> 8 0009 B 952. 956. 943. 969. 993. 949.
#> 9 0009 X NA 1021. 969. 926. 993. NA
#> 10 0018 A NA 912. 936. 943. 934. 973.
#> 11 0018 B 951. 964. 965. 965. 949. 984.
#> 12 0018 X NA 912. 936. 943. 949. 984.
#> 13 0039 A 947. 988. 996. 996. 961. 944.
#> 14 0039 B 903. 902. 921. 967. 955. 961.
#> 15 0039 X 947. 988. 996. 996. 955. 961.
#> 16 0043 A 985. 959. 967. NA 961. 995.
#> 17 0043 B NA 1029. 914. 893. 998. 995.
#> 18 0043 X NA 1029. 967. NA 998. 995.
#> 19 0046 A 930. 987. 893. 922. 1009. 979.
#> 20 0046 B 1008. 1066. 922. 976. 995. 953.
#> 21 0046 X 1008. 1066. 893. 922. 995. 953.
#> 22 0048 A 1027. 958. 930. 930. 1000. NA
#> 23 0048 B 999. 918. 950. 950. 983. 957.
#> 24 0048 X 999. 958. 930. 930. 983. 957.
Created on 2021-07-29 by the reprex package (v0.3.0)
And here is an alternative using group_modify
and add_row
:
library(tidyverse)
df %>%
group_by(PatientID) %>%
group_modify(.f =
~ {
df_b <- .x[.x$Timepoint == "B",]
df_a <- .x[.x$Timepoint == "A",]
.x %>% add_row(Timepoint = "X",
A = ifelse(df_b$A > 999, df_b$A, df_a$A),
B = ifelse(df_b$B > 986, df_b$B, df_a$B),
C = ifelse(df_b$C > 1000, df_b$C, df_a$C),
D = ifelse(df_b$D > 1030, df_b$D, df_a$D),
E = ifelse(df_b$E > 800, df_b$E, df_a$E),
G = ifelse(df_b$G > 950, df_b$G, df_a$G)
)
})
#> # A tibble: 24 x 8
#> # Groups: PatientID [8]
#> PatientID Timepoint A B C D E G
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0002 A NA 999. 987. 962. 1006. NA
#> 2 0002 B 977. NA 962. 929. 1028. 959.
#> 3 0002 X NA NA 987. 962. 1028. 959.
#> 4 0005 A NA 999. 929. 978. 954. 925.
#> 5 0005 B 964. NA 978. 1006. 1006. 956.
#> 6 0005 X NA NA 929. 978. 1006. 956.
#> 7 0009 A NA 1021. 969. 926. 950. NA
#> 8 0009 B 952. 956. 943. 969. 993. 949.
#> 9 0009 X NA 1021. 969. 926. 993. NA
#> 10 0018 A NA 912. 936. 943. 934. 973.
#> # ... with 14 more rows
Created on 2021-07-29 by the reprex package (v0.3.0)
How do I create a new column based on multiple conditions from multiple columns?
We can use %in%
for comparing multiple elements in a column, &
to check if both conditions are TRUE.
library(dplyr)
df %>%
mutate(get.flyer = c("", "Yes")[(commute %in% c("walk", "bike", "subway", "ferry") &
as.character(kids) == "Yes" &
as.numeric(as.character(distance)) < 10)+1] )
It is better to create the data.frame
with stringsAsFactors=FALSE
as by default it is TRUE
. If we check the str(df)
, we can find that all the columns are factor
class. Also, if there are missing values, instead of ""
, NA
can be used to avoid converting the class
of a numeric
column to something else.
If we rewrite the creation of 'df'
distance <- c(1, 12, 5, 25, 7, 2, NA, 8, 19, 7, NA, 4, 16, 12, 7)
df1 <- data.frame(commute, kids, distance, stringsAsFactors=FALSE)
the above code can be simplified
df1 %>%
mutate(get.flyer = c("", "Yes")[(commute %in% c("walk", "bike", "subway", "ferry") &
kids == "Yes" &
distance < 10)+1] )
For better understanding, some people prefer ifelse
df1 %>%
mutate(get.flyer = ifelse(commute %in% c("walk", "bike", "subway", "ferry") &
kids == "Yes" &
distance < 10,
"Yes", ""))
This can be also done easily with base R
methods
df1$get.flyer <- with(df1, ifelse(commute %in% c("walk", "bike", "subway", "ferry") &
kids == "Yes" &
distance < 10,
"Yes", ""))
create new column based on existing pattern R
We may use regex_left_join
library(data.table)
library(fuzzyjoin)
regex_left_join(tableRules, data.table(DIMENSION = listDimPoss),
by = c("object_name" = "DIMENSION"))
object_name DIMENSION
1 instr_asset_row instr_asset
2 functional_cat functional_cat
3 ref_sector_second ref_sector
Change numeric values in one column based on factor levels in another column
Or you could use %in%
for multiple match/replacement
df$Items[df$Store.Type %in% c("A", "C")] <- 0
df
#Items Store.Type
#1 0 A
#2 4 B
#3 0 C
#4 6 D
#5 3 B
#6 7 E
Related Topics
R Knitr Markdown: Output Plots Within for Loop
Add a Horizontal Line to Plot and Legend in Ggplot2
Replace Logical Values (True/False) with Numeric (1/0)
Plots Generated by 'Plot' and 'Ggplot' Side-By-Side
Collect All User Inputs Throughout the Shiny App
Add a Column with Count of Nas and Mean
In R, How to Add a Max by Group
Is There Anything Wrong with Using T & F Instead of True & False
Adding X and Y Axis Labels in Ggplot2
Create a Vector of All Days Between Two Dates
Adding a Company Logo to Shinydashboard Header
Object Not Found Error with Ddply Inside a Function
Stop an R Program Without Error
Add a Box for the Na Values to the Ggplot Legend for a Continuous Map
Converting a \U Escaped Unicode String to Ascii
Typeof Returns Integer for Something That Is Clearly a Factor
Scraping with Rvest - Complete with Nas When Tag Is Not Present