Select Multiple Columns in Data.Table by Their Numeric Indices

Select multiple columns in data.table by their numeric indices

For versions of data.table >= 1.9.8, the following all just work:

library(data.table)
dt <- data.table(a = 1, b = 2, c = 3)

# select single column by index
dt[, 2]
# b
# 1: 2

# select multiple columns by index
dt[, 2:3]
# b c
# 1: 2 3

# select single column by name
dt[, "a"]
# a
# 1: 1

# select multiple columns by name
dt[, c("a", "b")]
# a b
# 1: 1 2

For versions of data.table < 1.9.8 (for which numerical column selection required the use of with = FALSE), see this previous version of this answer. See also NEWS on v1.9.8, POTENTIALLY BREAKING CHANGES, point 3.

Extract columns from data table by numeric indices stored in a vector

We can use double dots (..) before the object 'a' to extract the columns

dt[, ..a]
# col4 col5 col6
#1: 4 5 6
#2: 5 6 7
#3: 6 7 8
#4: 7 8 9

Or another option is with = FALSE

dt[, a, with = FALSE]

data

dt <- data.table(col1 = 1:4, col2 = 2:5, col3 = 3:6, col4 = 4:7, col5 = 5:8, col6 = 6:9)

best way to select columns from data.table by type

You can pass a logical/character vector to .SDcols.

For character columns, we can do

library(data.table)
cols <- names(Filter(is.character, dt))
dt[, (cols) := lapply(.SD, tolower), .SDcols = cols]

Transform multiple columns and return all columns using data.table in R

Assign the output from lapply back to petal_cols.

library(data.table)
iris[, (petal_cols) := lapply(.SD, round, digits = 0),.SDcols = petal_cols]
iris

# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1: 5.1 3.5 1 0 setosa
# 2: 4.9 3.0 1 0 setosa
# 3: 4.7 3.2 1 0 setosa
# 4: 4.6 3.1 2 0 setosa
# 5: 5.0 3.6 1 0 setosa
# ---
#146: 6.7 3.0 5 2 virginica
#147: 6.3 2.5 5 2 virginica
#148: 6.5 3.0 5 2 virginica
#149: 6.2 3.4 5 2 virginica
#150: 5.9 3.0 5 2 virginica

data.table efficiently finding common pairs between 2 columns

The columns "stim1" and "stim2" define an undirected graph. Create the graph for feedback == 1, get its connected components and for each row of the data.frame, check if the values of "stim1" and "stim2" belong to the same component. In the end assign NaN to the rows where feedback is 1.

suppressPackageStartupMessages(library(igraph))

inx <- df1$feedback == 1
g <- graph_from_data_frame(df1[inx, c("stim1", "stim2")], directed = FALSE)
plot(g)

Sample Image


g_comp <- components(g)$membership

df1$transitive_pair_2 <- apply(df1[c("stim1", "stim2")], 1, \(x) {
i <- names(g_comp) == x[1]
j <- names(g_comp) == x[2]
if(any(i) & any(j))
g_comp[i] == g_comp[j]
else 0L
})

df1$transitive_pair_2[inx] <- NaN
df1
#> subject stim1 stim2 feedback transitive_pair transitive_pair_2
#> 1 1003 50 51 1 NaN NaN
#> 2 1003 48 50 1 NaN NaN
#> 3 1003 49 51 1 NaN NaN
#> 4 1003 47 49 1 NaN NaN
#> 5 1003 47 46 1 NaN NaN
#> 6 1003 46 48 1 NaN NaN
#> 10 1003 50 48 1 NaN NaN
#> 428 1003 48 51 0 1 1
#> 433 1003 46 50 0 1 1
#> 434 1003 50 49 0 1 1
#> 435 1003 54 59 0 0 0

Created on 2022-07-31 by the reprex package (v2.0.1)

Issue converting multiple column classes in R data.table

factcols in .SDcols=factcols should be a length-4 logical vector or the vector of column name/position, e.g. .SDcols = c("Born_before_2016"),.SDcols = 1, but factcols <- sapply(norw5[,..varls], is.numeric) returns length-3 logical vector.
It can be fixed as

fact <- c('Born_before_2016','gender','payor')
factcols <- sapply(norw5[,..fact], is.numeric)
cols <- names(norw5)[1:3][factcols]
norw5new <- norw5[,(cols) := lapply(.SD,as.character),.SDcols=cols]
norw5new

# Born_before_2016 gender payor Age_in_day
# <char> <char> <char> <int>
#1: 1 2.Female 1:Private 0
#2: 1 1.Male 1:Private 0
#3: 1 2.Female 4:Other 0
#4: 1 1.Male 4:Other 4
#5: 1 1.Male 1:Private 5

Adding multiple columns from a list table

library(data.table)

cbind(have1, transpose(have2, make.names = "variable"))

Output

   Column1 Column2 Column3 Apple Orange Pear
1: 100 200 159 0.25 2.68 0.11
2: 169 506 101 0.25 2.68 0.11
3: 100 200 636 0.25 2.68 0.11

Data

have1 <- structure(list(Column1 = c(100L, 169L, 100L), Column2 = c(200L, 
506L, 200L), Column3 = c(159L, 101L, 636L)), class = c("data.table",
"data.frame"), row.names = c(NA, -3L))
setDT(have1)

have2 <- structure(list(variable = c("Apple", "Orange", "Pear"), value = c(0.25,
2.68, 0.11)), class = c("data.table", "data.frame"), row.names = c(NA,
-3L))
setDT(have2)


Related Topics



Leave a reply



Submit