Converting Multiple Boolean Columns to Single Factor Column

Converting multiple boolean columns to single factor column

Assuming d is the data, the new column could be obtained with

d$type <- names(d[-1])[apply(d[-1] == 1, 1, which)]
d[c(1, 6)]
#       A type
# 1   ex1   S1
# 2   ex2   S2
# 3   ex3   S3
# 4   ex4   S1
# 5   ex5   S4
# 6   ex6   S2
# 7   ex7   S1
# 8   ex8   S2
# 9   ex9   S3
# 10 ex10   S1

R: Convert factor column to multiple boolean columns

You could try:

 lst <- strsplit(as.character(df$Events),"-")
 lvl <- unique(unlist(lst))      
 res <- data.frame(date=df$date,
            do.call(rbind,lapply(lst, function(x) table(factor(x, levels=lvl)))), 
                                       stringsAsFactors=FALSE)

  res
 #         date Rain Fog Snow Thunderstorm
 #1  2013-01-08    1   0    0            0
 #2  2013-01-09    0   1    0            0
 #3  2013-01-10    0   0    0            0
 #4  2013-01-11    1   1    0            0
 #5  2013-01-12    0   0    1            0
 #6  2013-01-13    1   0    1            0
 #7  2013-01-14    1   0    0            1
 #8  2013-01-15    0   0    0            1
 #9  2013-01-16    1   1    0            1
 #10 2013-01-17    0   1    0            1
# 11 2013-01-18    1   1    1            1

Or possibly, this could be faster than the above (contributed by @alexis_laz)

  setNames(data.frame(df$date, do.call(rbind,lapply(lst, function(x) as.integer(lvl %in% x)) )), c("date", lvl))

 library(devtools)
 library(data.table)
 source_gist("11380733")
 library(reshape2) #In case it is needed 

 res1 <- dcast.data.table(cSplit(df, "Events", "-", "long"), date~Events)
 res2 <- merge(subset(df, select=1), res1, by="date", all=TRUE)
 res2 <- as.data.frame(res2)
 res2[,-1]  <- (!is.na(res2[,-1]))+0
 res2[,c(1,3,2,4,5)]
 #          date Rain Fog Snow Thunderstorm
  #1  2013-01-08    1   0    0            0
  #2  2013-01-09    0   1    0            0
  #3  2013-01-10    0   0    0            0
  #4  2013-01-11    1   1    0            0
  #5  2013-01-12    0   0    1            0
  #6  2013-01-13    1   0    1            0
  #7  2013-01-14    1   0    0            1
  #8  2013-01-15    0   0    0            1
  #9  2013-01-16    1   1    0            1
  #10 2013-01-17    0   1    0            1
  #11 2013-01-18    1   1    1            1

 library(qdap)
 with(df, termco(Events, date, c("Rain", "Fog", "Snow", "Thunderstorm")))[[1]][,-2]
 #         date Rain Fog Snow Thunderstorm
 #1  2013-01-08    1   0    0            0
 #2  2013-01-09    0   1    0            0
 #3  2013-01-10    0   0    0            0
 #4  2013-01-11    1   1    0            0
 #5  2013-01-12    0   0    1            0
 #6  2013-01-13    1   0    1            0
 #7  2013-01-14    1   0    0            1
 #8  2013-01-15    0   0    0            1
 #9  2013-01-16    1   1    0            1
 #10 2013-01-17    0   1    0            1
 #11 2013-01-18    1   1    1            1

Convert Boolean indicator columns to a single factor column

Here's one way using tidyverse functions

library(tibble)
library(dplyr)
library(tidyr)
dat %>% 
  rowid_to_column() %>% # keep data for each row together
  gather("col", "val", -rowid) %>% 
  mutate(rowid=factor(rowid)) %>% 
  filter(val==1) %>% 
  group_by(rowid) %>% 
  summarize(desired=paste(col, collapse=",")) %>%  #collapse values
  complete(rowid, fill = list(desired="none")) # add "none" for empty groups

#   rowid desired
#   <fct> <chr>  
# 1 1     none   
# 2 2     a      
# 3 3     a,b,c

The basic idea involves reshaping the data so we can run functions of groups rather than running functions over rows of a data.frame which isn't as easy.

R: Collapse multiple boolean columns into single attribute column with new rows for each combination

in base R:

 subset(cbind(A=dat[,1],stack(dat[-1])),values==1,-2)
      A ind
1   ex1  S1
4   ex4  S1
7   ex7  S1
10 ex10  S1
12  ex2  S2
14  ex4  S2
15  ex5  S2
16  ex6  S2
17  ex7  S2
18  ex8  S2
23  ex3  S3
27  ex7  S3
28  ex8  S3
29  ex9  S3
35  ex5  S4

In the tidyverse:

library(tidyverse)
dat%>%
   gather(Type,j,-A)%>%
   filter(j==1)%>%
   select(-j)
      A Type
1   ex1   S1
2   ex4   S1
3   ex7   S1
4  ex10   S1
5   ex2   S2
6   ex4   S2
7   ex5   S2
8   ex6   S2
9   ex7   S2
10  ex8   S2
11  ex3   S3
12  ex7   S3
13  ex8   S3
14  ex9   S3
15  ex5   S4

Combining values Boolean columns to one with Priority in R

tmp = data.frame(ID = dat[,1],
                 Result = col_order[apply(
                     X = dat[col_order],
                     MARGIN = 1,
                     FUN = function(x) which(x == "Y")[1])],
                 stringsAsFactors = FALSE)
tmp$Result[is.na(tmp$Result)] = "Not Present"
tmp
#  ID      Result
#1  1           A
#2  2           B
#3  3           C
#4  4           D
#5  5           E
#6  6           C
#7  7           B
#8  8 Not Present

How to transform multiple boolean columns to one column with column headers and NaN?

Try this, idxmax with series.where and df.any

df.idxmax(1).where(df.any(1))
#or np.where(df.any(1),df.idxmax(1),np.nan)

0      B
1    NaN
2      A
3      C
4      B
dtype: object

Convert a factor column to multiple boolean columns

How about model.matrix?

model.matrix(~factor(x)-1,data=DT)

   factor(x)1 factor(x)2 factor(x)3 factor(x)4 factor(x)5
1           1          0          0          0          0
2           0          1          0          0          0
3           0          0          1          0          0
4           0          0          0          1          0
5           0          0          0          0          1
6           1          0          0          0          0
7           0          1          0          0          0
8           0          0          1          0          0
9           0          0          0          1          0
10          0          0          0          0          1
attr(,"assign")
[1] 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$`factor(x)`
[1] "contr.treatment"

Apparently, you can put model.matrix into [.data.table to give the same results. Not sure if it would be faster:

DT[,model.matrix(~factor(x)-1)]

Python - Pandas - DataFrame - Explode single column into multiple boolean columns based on conditions

use Series.str.get_dummies():

In [31]: df.join(df.Letter.str.get_dummies())
Out[31]:
   Position Letter  a  b  c
0         1      a  1  0  0
1         2      b  0  1  0
2         3      c  0  0  1
3         4      b  0  1  0
4         5      b  0  1  0

In [32]: df.join(df.Letter.str.get_dummies().astype(bool))
Out[32]:
   Position Letter      a      b      c
0         1      a   True  False  False
1         2      b  False   True  False
2         3      c  False  False   True
3         4      b  False   True  False
4         5      b  False   True  False

Combining boolean columns into 1 using r

We can use max.col to directly get the column index for each row and replace that with the column names of the dataset

Type <- names(df1)[4:ncol(df1)][max.col(df1[4:ncol(df1)], 'first')]
df2 <- cbind(df1[1:3], Type = Type)
df2
#  X0.501_0.499.1 X0.400_0.600 X0.400_0.600.1        Type
#1         -39.91       -63.62         -53.14  X1.Octanol
#2         -48.68       -73.45         -63.17  X1.Octanol
#3         -57.89       -84.45         -73.99  X1.Octanol
#4         -65.99       -92.61         -83.37  X1.Octanol
#5         -72.62      -101.14         -91.33  X1.Octanol
#6        -167.42      -263.80        -218.03 X1.Propanol

data

df1 <- structure(list(X0.501_0.499.1 = c(-39.91, -48.68, -57.89, -65.99, 
-72.62, -167.42), X0.400_0.600 = c(-63.62, -73.45, -84.45, -92.61, 
-101.14, -263.8), X0.400_0.600.1 = c(-53.14, -63.17, -73.99, 
-83.37, -91.33, -218.03), X1.Octanol = c(1L, 1L, 1L, 1L, 1L, 
0L), X1.Propanol = c(0L, 0L, 0L, 0L, 0L, 1L), X2.Butanol = c(0L, 
0L, 0L, 0L, 0L, 0L), X2.propanol = c(0L, 0L, 0L, 0L, 0L, 0L), 
    X1.isobutanol = c(0L, 0L, 0L, 0L, 0L, 0L)),
    class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6"))

Convert multiple columns to factor and give them numerical values

We can use mutate with across

df <- df %>% 
  mutate(across(contains('growth'), ~ ordered(.,
      levels = c("NG", "SG", "LG", "MG", "HG"), 
       labels = c('0', '2.5', '12', '40', '100'))))

Or with lapply in base R

nm1 <- grep('growth', names(df), value = TRUE)
df[nm1] <- lapply(df[nm1], function(x)  ordered(x, 
   levels = c("NG", "SG", "LG", "MG", "HG"), 
       labels = c('0', '2.5', '12', '40', '100')))

Or this can be also done with ftransform (ftransformv - for multiple columns) from collapse

library(collapse)
f1 <- function(x)  {
      ordered(x, levels = c("NG", "SG", "LG", "MG", "HG"), 
         labels = c('0', '2.5', '12', '40', '100'))
 }

i1 <- grep('growth', names(df))
ftransformv(df, i1, f1)

-output

#   ABC_growth ZFG_growth
#1          40       <NA>
#2          40       <NA>
#3          40       <NA>
#4          40       <NA>
#5          40       <NA>
#6          12         12
#7          12         12
#8          12         12
#9          12         12
#10         12         12
#11          0        2.5
#12          0        2.5
#13          0        2.5
#14          0        2.5
#15          0        2.5

Converting Multiple Boolean Columns to Single Factor Column