How to Add a Factor Column to Dataframe Based on a Conditional Statement from Another Column

How to add a factor column to dataframe based on a conditional statement from another column?

you can use ifelse like this

dataframe$periodframe <- ifelse(dataframe$year > 1991,"post-1991", "pre-1991")

Create a factor variable based on conditions across multiple columns in R

Here's a tidy solution:

library(dplyr)
library(tidyr)
dat <- expand.grid(id = 1:3,
num=1:6)
dat$Sys <- NA
dat$Sys[which(dat$id == 1)] <- runif(6, 10, 100)
dat$Sys[which(dat$id != 1)] <- runif(12, 110, 145)
dat$Dia <- NA
dat$Dia[which(dat$id == 1)] <- runif(6, 91, 125)
dat$Dia[which(dat$id != 1)] <- runif(12, 70, 95)

dat <- dat %>% pivot_wider(values_from=c("Sys", "Dia"),
names_from="num",
names_sep="")

dat %>%
rowwise() %>%
mutate(CH = case_when(any(c_across(contains("Sys")) >= 140) |
any(c_across(contains("Dia")) <= 90) ~ 1,
TRUE ~ -0))
#> # A tibble: 3 × 14
#> # Rowwise:
#> id Sys1 Sys2 Sys3 Sys4 Sys5 Sys6 Dia1 Dia2 Dia3 Dia4 Dia5 Dia6
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 71.3 78.2 47.4 53.6 67.0 47.5 107. 114. 106. 112. 104. 108.
#> 2 2 114. 113. 125. 142. 142. 116. 71.8 82.2 73.4 75.8 70.4 93.1
#> 3 3 144. 136. 118. 112. 133. 126. 77.6 88.2 85.6 91.6 75.9 77.9
#> # … with 1 more variable: CH <dbl>

Created on 2022-06-28 by the reprex package (v2.0.1)

Creating a factor column based on unknown numbers in another column

We could do this automatically by either match applied on the unique values of 'Region_ID' to return the index and then paste with 'BNR' substring or convert to factor with levels specified as unique(Region_ID) and coerce to integer with as.integer

list.files(path = 'Z:/fishproj/Cambodia Dai project/Analytic/Flux/River_Width/Dai3C',                               #identifies all .csv files associated with Dai15 full water column Sv measurements and compiles them into one data frame
pattern = "^Dai3D_ABC_10mbin_20211209_fullwatercolumn_evening_BNR*.*csv", full.names = TRUE) %>%
map_dfr(read_csv) %>%
mutate(BNR = str_c("BNR", match(Region_ID, unique(Region_ID))))

Create a new factor level (new row) based on data from other rows with conditional statements

Here is one tidyverse approach.

library(tidyverse)

df %>%
pivot_wider(names_from = Timepoint,
values_from = A:G
) %>%
mutate(A_X = ifelse(A_B > 999, A_B, A_A),
B_X = ifelse(B_B > 986, B_B, B_A),
C_X = ifelse(C_B > 1000, C_B, C_A),
D_X = ifelse(D_B > 1030, D_B, D_A),
E_X = ifelse(E_B > 800, E_B, E_A),
G_X = ifelse(G_B > 950, G_B, G_A)) %>%
pivot_longer(cols = A_A:G_X,
names_sep = "_",
names_to = c("cat","Timepoint")) %>%
pivot_wider(names_from = cat,
values_from = value)

#> # A tibble: 24 x 8
#> PatientID Timepoint A B C D E G
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0002 A NA 999. 987. 962. 1006. NA
#> 2 0002 B 977. NA 962. 929. 1028. 959.
#> 3 0002 X NA NA 987. 962. 1028. 959.
#> 4 0005 A NA 999. 929. 978. 954. 925.
#> 5 0005 B 964. NA 978. 1006. 1006. 956.
#> 6 0005 X NA NA 929. 978. 1006. 956.
#> 7 0009 A NA 1021. 969. 926. 950. NA
#> 8 0009 B 952. 956. 943. 969. 993. 949.
#> 9 0009 X NA 1021. 969. 926. 993. NA
#> 10 0018 A NA 912. 936. 943. 934. 973.
#> 11 0018 B 951. 964. 965. 965. 949. 984.
#> 12 0018 X NA 912. 936. 943. 949. 984.
#> 13 0039 A 947. 988. 996. 996. 961. 944.
#> 14 0039 B 903. 902. 921. 967. 955. 961.
#> 15 0039 X 947. 988. 996. 996. 955. 961.
#> 16 0043 A 985. 959. 967. NA 961. 995.
#> 17 0043 B NA 1029. 914. 893. 998. 995.
#> 18 0043 X NA 1029. 967. NA 998. 995.
#> 19 0046 A 930. 987. 893. 922. 1009. 979.
#> 20 0046 B 1008. 1066. 922. 976. 995. 953.
#> 21 0046 X 1008. 1066. 893. 922. 995. 953.
#> 22 0048 A 1027. 958. 930. 930. 1000. NA
#> 23 0048 B 999. 918. 950. 950. 983. 957.
#> 24 0048 X 999. 958. 930. 930. 983. 957.

Created on 2021-07-29 by the reprex package (v0.3.0)

And here is an alternative using group_modify and add_row:

library(tidyverse)

df %>%
group_by(PatientID) %>%
group_modify(.f =

~ {
df_b <- .x[.x$Timepoint == "B",]
df_a <- .x[.x$Timepoint == "A",]

.x %>% add_row(Timepoint = "X",
A = ifelse(df_b$A > 999, df_b$A, df_a$A),
B = ifelse(df_b$B > 986, df_b$B, df_a$B),
C = ifelse(df_b$C > 1000, df_b$C, df_a$C),
D = ifelse(df_b$D > 1030, df_b$D, df_a$D),
E = ifelse(df_b$E > 800, df_b$E, df_a$E),
G = ifelse(df_b$G > 950, df_b$G, df_a$G)
)
})

#> # A tibble: 24 x 8
#> # Groups: PatientID [8]
#> PatientID Timepoint A B C D E G
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0002 A NA 999. 987. 962. 1006. NA
#> 2 0002 B 977. NA 962. 929. 1028. 959.
#> 3 0002 X NA NA 987. 962. 1028. 959.
#> 4 0005 A NA 999. 929. 978. 954. 925.
#> 5 0005 B 964. NA 978. 1006. 1006. 956.
#> 6 0005 X NA NA 929. 978. 1006. 956.
#> 7 0009 A NA 1021. 969. 926. 950. NA
#> 8 0009 B 952. 956. 943. 969. 993. 949.
#> 9 0009 X NA 1021. 969. 926. 993. NA
#> 10 0018 A NA 912. 936. 943. 934. 973.
#> # ... with 14 more rows

Created on 2021-07-29 by the reprex package (v0.3.0)

How do I create a new column based on multiple conditions from multiple columns?

We can use %in% for comparing multiple elements in a column, & to check if both conditions are TRUE.

library(dplyr)
df %>%
mutate(get.flyer = c("", "Yes")[(commute %in% c("walk", "bike", "subway", "ferry") &
as.character(kids) == "Yes" &
as.numeric(as.character(distance)) < 10)+1] )

It is better to create the data.frame with stringsAsFactors=FALSE as by default it is TRUE. If we check the str(df), we can find that all the columns are factor class. Also, if there are missing values, instead of "", NA can be used to avoid converting the class of a numeric column to something else.

If we rewrite the creation of 'df'

distance <- c(1, 12, 5, 25, 7, 2, NA, 8, 19, 7, NA, 4, 16, 12, 7)
df1 <- data.frame(commute, kids, distance, stringsAsFactors=FALSE)

the above code can be simplified

df1 %>%
mutate(get.flyer = c("", "Yes")[(commute %in% c("walk", "bike", "subway", "ferry") &
kids == "Yes" &
distance < 10)+1] )

For better understanding, some people prefer ifelse

df1 %>% 
mutate(get.flyer = ifelse(commute %in% c("walk", "bike", "subway", "ferry") &
kids == "Yes" &
distance < 10,
"Yes", ""))

This can be also done easily with base R methods

df1$get.flyer <- with(df1, ifelse(commute %in% c("walk", "bike", "subway", "ferry") & 
kids == "Yes" &
distance < 10,
"Yes", ""))

create new column based on existing pattern R

We may use regex_left_join

library(data.table)
library(fuzzyjoin)
regex_left_join(tableRules, data.table(DIMENSION = listDimPoss),
by = c("object_name" = "DIMENSION"))
object_name DIMENSION
1 instr_asset_row instr_asset
2 functional_cat functional_cat
3 ref_sector_second ref_sector

Change numeric values in one column based on factor levels in another column

Or you could use %in% for multiple match/replacement

 df$Items[df$Store.Type %in% c("A", "C")] <- 0
df
#Items Store.Type
#1 0 A
#2 4 B
#3 0 C
#4 6 D
#5 3 B
#6 7 E


Related Topics



Leave a reply



Submit