Converting multiple boolean columns to single factor column
Assuming d
is the data, the new column could be obtained with
d$type <- names(d[-1])[apply(d[-1] == 1, 1, which)]
d[c(1, 6)]
# A type
# 1 ex1 S1
# 2 ex2 S2
# 3 ex3 S3
# 4 ex4 S1
# 5 ex5 S4
# 6 ex6 S2
# 7 ex7 S1
# 8 ex8 S2
# 9 ex9 S3
# 10 ex10 S1
R: Convert factor column to multiple boolean columns
You could try:
lst <- strsplit(as.character(df$Events),"-")
lvl <- unique(unlist(lst))
res <- data.frame(date=df$date,
do.call(rbind,lapply(lst, function(x) table(factor(x, levels=lvl)))),
stringsAsFactors=FALSE)
res
# date Rain Fog Snow Thunderstorm
#1 2013-01-08 1 0 0 0
#2 2013-01-09 0 1 0 0
#3 2013-01-10 0 0 0 0
#4 2013-01-11 1 1 0 0
#5 2013-01-12 0 0 1 0
#6 2013-01-13 1 0 1 0
#7 2013-01-14 1 0 0 1
#8 2013-01-15 0 0 0 1
#9 2013-01-16 1 1 0 1
#10 2013-01-17 0 1 0 1
# 11 2013-01-18 1 1 1 1
Or possibly, this could be faster than the above (contributed by @alexis_laz)
setNames(data.frame(df$date, do.call(rbind,lapply(lst, function(x) as.integer(lvl %in% x)) )), c("date", lvl))
Or
library(devtools)
library(data.table)
source_gist("11380733")
library(reshape2) #In case it is needed
res1 <- dcast.data.table(cSplit(df, "Events", "-", "long"), date~Events)
res2 <- merge(subset(df, select=1), res1, by="date", all=TRUE)
res2 <- as.data.frame(res2)
res2[,-1] <- (!is.na(res2[,-1]))+0
res2[,c(1,3,2,4,5)]
# date Rain Fog Snow Thunderstorm
#1 2013-01-08 1 0 0 0
#2 2013-01-09 0 1 0 0
#3 2013-01-10 0 0 0 0
#4 2013-01-11 1 1 0 0
#5 2013-01-12 0 0 1 0
#6 2013-01-13 1 0 1 0
#7 2013-01-14 1 0 0 1
#8 2013-01-15 0 0 0 1
#9 2013-01-16 1 1 0 1
#10 2013-01-17 0 1 0 1
#11 2013-01-18 1 1 1 1
Or
library(qdap)
with(df, termco(Events, date, c("Rain", "Fog", "Snow", "Thunderstorm")))[[1]][,-2]
# date Rain Fog Snow Thunderstorm
#1 2013-01-08 1 0 0 0
#2 2013-01-09 0 1 0 0
#3 2013-01-10 0 0 0 0
#4 2013-01-11 1 1 0 0
#5 2013-01-12 0 0 1 0
#6 2013-01-13 1 0 1 0
#7 2013-01-14 1 0 0 1
#8 2013-01-15 0 0 0 1
#9 2013-01-16 1 1 0 1
#10 2013-01-17 0 1 0 1
#11 2013-01-18 1 1 1 1
Convert Boolean indicator columns to a single factor column
Here's one way using tidyverse functions
library(tibble)
library(dplyr)
library(tidyr)
dat %>%
rowid_to_column() %>% # keep data for each row together
gather("col", "val", -rowid) %>%
mutate(rowid=factor(rowid)) %>%
filter(val==1) %>%
group_by(rowid) %>%
summarize(desired=paste(col, collapse=",")) %>% #collapse values
complete(rowid, fill = list(desired="none")) # add "none" for empty groups
# rowid desired
# <fct> <chr>
# 1 1 none
# 2 2 a
# 3 3 a,b,c
The basic idea involves reshaping the data so we can run functions of groups rather than running functions over rows of a data.frame which isn't as easy.
R: Collapse multiple boolean columns into single attribute column with new rows for each combination
in base R:
subset(cbind(A=dat[,1],stack(dat[-1])),values==1,-2)
A ind
1 ex1 S1
4 ex4 S1
7 ex7 S1
10 ex10 S1
12 ex2 S2
14 ex4 S2
15 ex5 S2
16 ex6 S2
17 ex7 S2
18 ex8 S2
23 ex3 S3
27 ex7 S3
28 ex8 S3
29 ex9 S3
35 ex5 S4
In the tidyverse:
library(tidyverse)
dat%>%
gather(Type,j,-A)%>%
filter(j==1)%>%
select(-j)
A Type
1 ex1 S1
2 ex4 S1
3 ex7 S1
4 ex10 S1
5 ex2 S2
6 ex4 S2
7 ex5 S2
8 ex6 S2
9 ex7 S2
10 ex8 S2
11 ex3 S3
12 ex7 S3
13 ex8 S3
14 ex9 S3
15 ex5 S4
Combining values Boolean columns to one with Priority in R
tmp = data.frame(ID = dat[,1],
Result = col_order[apply(
X = dat[col_order],
MARGIN = 1,
FUN = function(x) which(x == "Y")[1])],
stringsAsFactors = FALSE)
tmp$Result[is.na(tmp$Result)] = "Not Present"
tmp
# ID Result
#1 1 A
#2 2 B
#3 3 C
#4 4 D
#5 5 E
#6 6 C
#7 7 B
#8 8 Not Present
How to transform multiple boolean columns to one column with column headers and NaN?
Try this, idxmax
with series.where
and df.any
df.idxmax(1).where(df.any(1))
#or np.where(df.any(1),df.idxmax(1),np.nan)
0 B
1 NaN
2 A
3 C
4 B
dtype: object
Convert a factor column to multiple boolean columns
How about model.matrix
?
model.matrix(~factor(x)-1,data=DT)
factor(x)1 factor(x)2 factor(x)3 factor(x)4 factor(x)5
1 1 0 0 0 0
2 0 1 0 0 0
3 0 0 1 0 0
4 0 0 0 1 0
5 0 0 0 0 1
6 1 0 0 0 0
7 0 1 0 0 0
8 0 0 1 0 0
9 0 0 0 1 0
10 0 0 0 0 1
attr(,"assign")
[1] 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$`factor(x)`
[1] "contr.treatment"
Apparently, you can put model.matrix
into [.data.table
to give the same results. Not sure if it would be faster:
DT[,model.matrix(~factor(x)-1)]
Python - Pandas - DataFrame - Explode single column into multiple boolean columns based on conditions
use Series.str.get_dummies():
In [31]: df.join(df.Letter.str.get_dummies())
Out[31]:
Position Letter a b c
0 1 a 1 0 0
1 2 b 0 1 0
2 3 c 0 0 1
3 4 b 0 1 0
4 5 b 0 1 0
or
In [32]: df.join(df.Letter.str.get_dummies().astype(bool))
Out[32]:
Position Letter a b c
0 1 a True False False
1 2 b False True False
2 3 c False False True
3 4 b False True False
4 5 b False True False
Combining boolean columns into 1 using r
We can use max.col
to directly get the column index for each row and replace that with the column names of the dataset
Type <- names(df1)[4:ncol(df1)][max.col(df1[4:ncol(df1)], 'first')]
df2 <- cbind(df1[1:3], Type = Type)
df2
# X0.501_0.499.1 X0.400_0.600 X0.400_0.600.1 Type
#1 -39.91 -63.62 -53.14 X1.Octanol
#2 -48.68 -73.45 -63.17 X1.Octanol
#3 -57.89 -84.45 -73.99 X1.Octanol
#4 -65.99 -92.61 -83.37 X1.Octanol
#5 -72.62 -101.14 -91.33 X1.Octanol
#6 -167.42 -263.80 -218.03 X1.Propanol
data
df1 <- structure(list(X0.501_0.499.1 = c(-39.91, -48.68, -57.89, -65.99,
-72.62, -167.42), X0.400_0.600 = c(-63.62, -73.45, -84.45, -92.61,
-101.14, -263.8), X0.400_0.600.1 = c(-53.14, -63.17, -73.99,
-83.37, -91.33, -218.03), X1.Octanol = c(1L, 1L, 1L, 1L, 1L,
0L), X1.Propanol = c(0L, 0L, 0L, 0L, 0L, 1L), X2.Butanol = c(0L,
0L, 0L, 0L, 0L, 0L), X2.propanol = c(0L, 0L, 0L, 0L, 0L, 0L),
X1.isobutanol = c(0L, 0L, 0L, 0L, 0L, 0L)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
Convert multiple columns to factor and give them numerical values
We can use mutate
with across
df <- df %>%
mutate(across(contains('growth'), ~ ordered(.,
levels = c("NG", "SG", "LG", "MG", "HG"),
labels = c('0', '2.5', '12', '40', '100'))))
Or with lapply
in base R
nm1 <- grep('growth', names(df), value = TRUE)
df[nm1] <- lapply(df[nm1], function(x) ordered(x,
levels = c("NG", "SG", "LG", "MG", "HG"),
labels = c('0', '2.5', '12', '40', '100')))
Or this can be also done with ftransform
(ftransformv
- for multiple columns) from collapse
library(collapse)
f1 <- function(x) {
ordered(x, levels = c("NG", "SG", "LG", "MG", "HG"),
labels = c('0', '2.5', '12', '40', '100'))
}
i1 <- grep('growth', names(df))
ftransformv(df, i1, f1)
-output
# ABC_growth ZFG_growth
#1 40 <NA>
#2 40 <NA>
#3 40 <NA>
#4 40 <NA>
#5 40 <NA>
#6 12 12
#7 12 12
#8 12 12
#9 12 12
#10 12 12
#11 0 2.5
#12 0 2.5
#13 0 2.5
#14 0 2.5
#15 0 2.5
Related Topics
Plot.Lm Error: $ Operator Is Invalid for Atomic Vectors
Non-Standard Evaluation and Quasiquotation in Dplyr() Not Working as (Naively) Expected
In Place Modification of Matrices in R
Ggplot2: Have Common Facet Bar in Outer Facet Panel in 3-Way Plot
R: How to Match/Join 2 Matrices of Different Dimensions (Nrow/Ncol)
How to Store Filter Expressions as Strings
Filtering Multiple Columns with Str_Detect
How to Use R to Create a Word Co-Occurrence Matrix
Subsetting Data Based on Dynamic Column Names
How to Read Column Names 'As Is' from CSV File
How to Log Transform the Y-Axis of R Geom_Histogram in the Right Direction
Scraping JavaScript Generated Data
R: Get Element by Name from a Nested List
Writing a Function to Calculate the Mean of Columns in a Dataframe in R
How to Automate Nested Sections in Rmds Which Include Text, Maps and Tables