Converting Multiple Boolean Columns to Single Factor Column

Converting multiple boolean columns to single factor column

Assuming d is the data, the new column could be obtained with

d$type <- names(d[-1])[apply(d[-1] == 1, 1, which)]
d[c(1, 6)]
# A type
# 1 ex1 S1
# 2 ex2 S2
# 3 ex3 S3
# 4 ex4 S1
# 5 ex5 S4
# 6 ex6 S2
# 7 ex7 S1
# 8 ex8 S2
# 9 ex9 S3
# 10 ex10 S1

R: Convert factor column to multiple boolean columns

You could try:

 lst <- strsplit(as.character(df$Events),"-")
lvl <- unique(unlist(lst))
res <- data.frame(date=df$date,
do.call(rbind,lapply(lst, function(x) table(factor(x, levels=lvl)))),
stringsAsFactors=FALSE)

res
# date Rain Fog Snow Thunderstorm
#1 2013-01-08 1 0 0 0
#2 2013-01-09 0 1 0 0
#3 2013-01-10 0 0 0 0
#4 2013-01-11 1 1 0 0
#5 2013-01-12 0 0 1 0
#6 2013-01-13 1 0 1 0
#7 2013-01-14 1 0 0 1
#8 2013-01-15 0 0 0 1
#9 2013-01-16 1 1 0 1
#10 2013-01-17 0 1 0 1
# 11 2013-01-18 1 1 1 1

Or possibly, this could be faster than the above (contributed by @alexis_laz)

  setNames(data.frame(df$date, do.call(rbind,lapply(lst, function(x) as.integer(lvl %in% x)) )), c("date", lvl))  

Or

 library(devtools)
library(data.table)
source_gist("11380733")
library(reshape2) #In case it is needed

res1 <- dcast.data.table(cSplit(df, "Events", "-", "long"), date~Events)
res2 <- merge(subset(df, select=1), res1, by="date", all=TRUE)
res2 <- as.data.frame(res2)
res2[,-1] <- (!is.na(res2[,-1]))+0
res2[,c(1,3,2,4,5)]
# date Rain Fog Snow Thunderstorm
#1 2013-01-08 1 0 0 0
#2 2013-01-09 0 1 0 0
#3 2013-01-10 0 0 0 0
#4 2013-01-11 1 1 0 0
#5 2013-01-12 0 0 1 0
#6 2013-01-13 1 0 1 0
#7 2013-01-14 1 0 0 1
#8 2013-01-15 0 0 0 1
#9 2013-01-16 1 1 0 1
#10 2013-01-17 0 1 0 1
#11 2013-01-18 1 1 1 1

Or

 library(qdap)
with(df, termco(Events, date, c("Rain", "Fog", "Snow", "Thunderstorm")))[[1]][,-2]
# date Rain Fog Snow Thunderstorm
#1 2013-01-08 1 0 0 0
#2 2013-01-09 0 1 0 0
#3 2013-01-10 0 0 0 0
#4 2013-01-11 1 1 0 0
#5 2013-01-12 0 0 1 0
#6 2013-01-13 1 0 1 0
#7 2013-01-14 1 0 0 1
#8 2013-01-15 0 0 0 1
#9 2013-01-16 1 1 0 1
#10 2013-01-17 0 1 0 1
#11 2013-01-18 1 1 1 1

Convert Boolean indicator columns to a single factor column

Here's one way using tidyverse functions

library(tibble)
library(dplyr)
library(tidyr)
dat %>%
rowid_to_column() %>% # keep data for each row together
gather("col", "val", -rowid) %>%
mutate(rowid=factor(rowid)) %>%
filter(val==1) %>%
group_by(rowid) %>%
summarize(desired=paste(col, collapse=",")) %>% #collapse values
complete(rowid, fill = list(desired="none")) # add "none" for empty groups

# rowid desired
# <fct> <chr>
# 1 1 none
# 2 2 a
# 3 3 a,b,c

The basic idea involves reshaping the data so we can run functions of groups rather than running functions over rows of a data.frame which isn't as easy.

R: Collapse multiple boolean columns into single attribute column with new rows for each combination

in base R:

 subset(cbind(A=dat[,1],stack(dat[-1])),values==1,-2)
A ind
1 ex1 S1
4 ex4 S1
7 ex7 S1
10 ex10 S1
12 ex2 S2
14 ex4 S2
15 ex5 S2
16 ex6 S2
17 ex7 S2
18 ex8 S2
23 ex3 S3
27 ex7 S3
28 ex8 S3
29 ex9 S3
35 ex5 S4

In the tidyverse:

library(tidyverse)
dat%>%
gather(Type,j,-A)%>%
filter(j==1)%>%
select(-j)
A Type
1 ex1 S1
2 ex4 S1
3 ex7 S1
4 ex10 S1
5 ex2 S2
6 ex4 S2
7 ex5 S2
8 ex6 S2
9 ex7 S2
10 ex8 S2
11 ex3 S3
12 ex7 S3
13 ex8 S3
14 ex9 S3
15 ex5 S4

Combining values Boolean columns to one with Priority in R

tmp = data.frame(ID = dat[,1],
Result = col_order[apply(
X = dat[col_order],
MARGIN = 1,
FUN = function(x) which(x == "Y")[1])],
stringsAsFactors = FALSE)
tmp$Result[is.na(tmp$Result)] = "Not Present"
tmp
# ID Result
#1 1 A
#2 2 B
#3 3 C
#4 4 D
#5 5 E
#6 6 C
#7 7 B
#8 8 Not Present

How to transform multiple boolean columns to one column with column headers and NaN?

Try this, idxmax with series.where and df.any

df.idxmax(1).where(df.any(1))
#or np.where(df.any(1),df.idxmax(1),np.nan)


0      B
1 NaN
2 A
3 C
4 B
dtype: object

Convert a factor column to multiple boolean columns

How about model.matrix?

model.matrix(~factor(x)-1,data=DT)

factor(x)1 factor(x)2 factor(x)3 factor(x)4 factor(x)5
1 1 0 0 0 0
2 0 1 0 0 0
3 0 0 1 0 0
4 0 0 0 1 0
5 0 0 0 0 1
6 1 0 0 0 0
7 0 1 0 0 0
8 0 0 1 0 0
9 0 0 0 1 0
10 0 0 0 0 1
attr(,"assign")
[1] 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$`factor(x)`
[1] "contr.treatment"

Apparently, you can put model.matrix into [.data.table to give the same results. Not sure if it would be faster:

DT[,model.matrix(~factor(x)-1)]

Python - Pandas - DataFrame - Explode single column into multiple boolean columns based on conditions

use Series.str.get_dummies():

In [31]: df.join(df.Letter.str.get_dummies())
Out[31]:
Position Letter a b c
0 1 a 1 0 0
1 2 b 0 1 0
2 3 c 0 0 1
3 4 b 0 1 0
4 5 b 0 1 0

or

In [32]: df.join(df.Letter.str.get_dummies().astype(bool))
Out[32]:
Position Letter a b c
0 1 a True False False
1 2 b False True False
2 3 c False False True
3 4 b False True False
4 5 b False True False

Combining boolean columns into 1 using r

We can use max.col to directly get the column index for each row and replace that with the column names of the dataset

Type <- names(df1)[4:ncol(df1)][max.col(df1[4:ncol(df1)], 'first')]
df2 <- cbind(df1[1:3], Type = Type)
df2
# X0.501_0.499.1 X0.400_0.600 X0.400_0.600.1 Type
#1 -39.91 -63.62 -53.14 X1.Octanol
#2 -48.68 -73.45 -63.17 X1.Octanol
#3 -57.89 -84.45 -73.99 X1.Octanol
#4 -65.99 -92.61 -83.37 X1.Octanol
#5 -72.62 -101.14 -91.33 X1.Octanol
#6 -167.42 -263.80 -218.03 X1.Propanol

data

df1 <- structure(list(X0.501_0.499.1 = c(-39.91, -48.68, -57.89, -65.99, 
-72.62, -167.42), X0.400_0.600 = c(-63.62, -73.45, -84.45, -92.61,
-101.14, -263.8), X0.400_0.600.1 = c(-53.14, -63.17, -73.99,
-83.37, -91.33, -218.03), X1.Octanol = c(1L, 1L, 1L, 1L, 1L,
0L), X1.Propanol = c(0L, 0L, 0L, 0L, 0L, 1L), X2.Butanol = c(0L,
0L, 0L, 0L, 0L, 0L), X2.propanol = c(0L, 0L, 0L, 0L, 0L, 0L),
X1.isobutanol = c(0L, 0L, 0L, 0L, 0L, 0L)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))

Convert multiple columns to factor and give them numerical values

We can use mutate with across

df <- df %>% 
mutate(across(contains('growth'), ~ ordered(.,
levels = c("NG", "SG", "LG", "MG", "HG"),
labels = c('0', '2.5', '12', '40', '100'))))

Or with lapply in base R

nm1 <- grep('growth', names(df), value = TRUE)
df[nm1] <- lapply(df[nm1], function(x) ordered(x,
levels = c("NG", "SG", "LG", "MG", "HG"),
labels = c('0', '2.5', '12', '40', '100')))

Or this can be also done with ftransform (ftransformv - for multiple columns) from collapse

library(collapse)
f1 <- function(x) {
ordered(x, levels = c("NG", "SG", "LG", "MG", "HG"),
labels = c('0', '2.5', '12', '40', '100'))
}

i1 <- grep('growth', names(df))
ftransformv(df, i1, f1)

-output

#   ABC_growth ZFG_growth
#1 40 <NA>
#2 40 <NA>
#3 40 <NA>
#4 40 <NA>
#5 40 <NA>
#6 12 12
#7 12 12
#8 12 12
#9 12 12
#10 12 12
#11 0 2.5
#12 0 2.5
#13 0 2.5
#14 0 2.5
#15 0 2.5


Related Topics



Leave a reply



Submit