Select Multiple Columns in Data.Table by Their Numeric Indices

Select multiple columns in data.table by their numeric indices

For versions of data.table >= 1.9.8, the following all just work:

library(data.table)
dt <- data.table(a = 1, b = 2, c = 3)

# select single column by index
dt[, 2]
#    b
# 1: 2

# select multiple columns by index
dt[, 2:3]
#    b c
# 1: 2 3

# select single column by name
dt[, "a"]
#    a
# 1: 1

# select multiple columns by name
dt[, c("a", "b")]
#    a b
# 1: 1 2

For versions of data.table < 1.9.8 (for which numerical column selection required the use of with = FALSE), see this previous version of this answer. See also NEWS on v1.9.8, POTENTIALLY BREAKING CHANGES, point 3.

Extract columns from data table by numeric indices stored in a vector

We can use double dots (..) before the object 'a' to extract the columns

dt[, ..a]
#   col4 col5 col6
#1:    4    5    6
#2:    5    6    7
#3:    6    7    8
#4:    7    8    9

Or another option is with = FALSE

dt[, a, with = FALSE]

data

dt <- data.table(col1 = 1:4, col2 = 2:5, col3 = 3:6, col4 = 4:7, col5 = 5:8, col6 = 6:9)

best way to select columns from data.table by type

You can pass a logical/character vector to .SDcols.

For character columns, we can do

library(data.table)
cols <- names(Filter(is.character, dt))
dt[, (cols) := lapply(.SD, tolower), .SDcols = cols]

Transform multiple columns and return all columns using data.table in R

Assign the output from lapply back to petal_cols.

library(data.table)
iris[, (petal_cols) := lapply(.SD, round, digits = 0),.SDcols = petal_cols]
iris

#     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
#  1:          5.1         3.5            1           0    setosa
#  2:          4.9         3.0            1           0    setosa
#  3:          4.7         3.2            1           0    setosa
#  4:          4.6         3.1            2           0    setosa
#  5:          5.0         3.6            1           0    setosa
# ---                                                            
#146:          6.7         3.0            5           2 virginica
#147:          6.3         2.5            5           2 virginica
#148:          6.5         3.0            5           2 virginica
#149:          6.2         3.4            5           2 virginica
#150:          5.9         3.0            5           2 virginica

data.table efficiently finding common pairs between 2 columns

The columns "stim1" and "stim2" define an undirected graph. Create the graph for feedback == 1, get its connected components and for each row of the data.frame, check if the values of "stim1" and "stim2" belong to the same component. In the end assign NaN to the rows where feedback is 1.

suppressPackageStartupMessages(library(igraph))

inx <- df1$feedback == 1
g <- graph_from_data_frame(df1[inx, c("stim1", "stim2")], directed = FALSE)
plot(g)

Sample Image


g_comp <- components(g)$membership

df1$transitive_pair_2 <- apply(df1[c("stim1", "stim2")], 1, \(x) {
  i <- names(g_comp) == x[1]
  j <- names(g_comp) == x[2]
  if(any(i) & any(j))
    g_comp[i] == g_comp[j]
  else 0L
})

df1$transitive_pair_2[inx] <- NaN
df1
#>     subject stim1 stim2 feedback transitive_pair transitive_pair_2
#> 1      1003    50    51        1             NaN               NaN
#> 2      1003    48    50        1             NaN               NaN
#> 3      1003    49    51        1             NaN               NaN
#> 4      1003    47    49        1             NaN               NaN
#> 5      1003    47    46        1             NaN               NaN
#> 6      1003    46    48        1             NaN               NaN
#> 10     1003    50    48        1             NaN               NaN
#> 428    1003    48    51        0               1                 1
#> 433    1003    46    50        0               1                 1
#> 434    1003    50    49        0               1                 1
#> 435    1003    54    59        0               0                 0

^{Created on 2022-07-31 by the reprex package (v2.0.1)}

Issue converting multiple column classes in R data.table

factcols in .SDcols=factcols should be a length-4 logical vector or the vector of column name/position, e.g. .SDcols = c("Born_before_2016"),.SDcols = 1, but factcols <- sapply(norw5[,..varls], is.numeric) returns length-3 logical vector.
It can be fixed as

fact <- c('Born_before_2016','gender','payor')
factcols <- sapply(norw5[,..fact], is.numeric)
cols <- names(norw5)[1:3][factcols]
norw5new <- norw5[,(cols) := lapply(.SD,as.character),.SDcols=cols]
norw5new 

#   Born_before_2016   gender     payor Age_in_day
#             <char>   <char>    <char>      <int>
#1:                1 2.Female 1:Private          0
#2:                1   1.Male 1:Private          0
#3:                1 2.Female   4:Other          0
#4:                1   1.Male   4:Other          4
#5:                1   1.Male 1:Private          5

Adding multiple columns from a list table

library(data.table)

cbind(have1, transpose(have2, make.names = "variable"))

Output

   Column1 Column2 Column3 Apple Orange Pear
1:     100     200     159  0.25   2.68 0.11
2:     169     506     101  0.25   2.68 0.11
3:     100     200     636  0.25   2.68 0.11

Data

have1 <- structure(list(Column1 = c(100L, 169L, 100L), Column2 = c(200L, 
506L, 200L), Column3 = c(159L, 101L, 636L)), class = c("data.table", 
"data.frame"), row.names = c(NA, -3L))
setDT(have1)

have2 <- structure(list(variable = c("Apple", "Orange", "Pear"), value = c(0.25, 
2.68, 0.11)), class = c("data.table", "data.frame"), row.names = c(NA, 
-3L))
setDT(have2)

Select Multiple Columns in Data.Table by Their Numeric Indices