Scale by Group in Data.Table

scale by group in data.table

The scale function output is a matrix, so convert it to a vector

dt[, c("score1", "score2") := lapply(.SD, function(x) as.vector(scale(x))), by = session]
dt
#    session     score1     score2
# 1:       1 -0.7433155 -0.6859943
# 2:       1 -1.0530303 -1.0289917
# 3:       1 -0.2787433 -0.3429970
# 4:       1  0.8052585  0.6859944
# 5:       1  1.2698307  1.3719886
# 6:       2 -0.7847341 -0.6824535
# 7:       2 -0.2942753 -0.3650335
# 8:       2 -0.9949307 -0.9205191
# 9:       2  0.7567078  0.4285175
#10:       2  1.3172322  1.5394886

To understand it better, try it on a simple vector

scale(1:10)
#        [,1]
# [1,] -1.4863011
# [2,] -1.1560120
# [3,] -0.8257228
# [4,] -0.4954337
# [5,] -0.1651446
# [6,]  0.1651446
# [7,]  0.4954337
# [8,]  0.8257228
# [9,]  1.1560120
#[10,]  1.4863011

scale values within group in R

You could apply scale function by group :

This can be done in base R:

df$y2 <- with(df, ave(y, x, FUN = scale))
df

#  x y        y2
#1 1 1 -0.707107
#2 1 3  0.707107
#3 2 4  0.707107
#4 2 3 -0.707107
#5 3 5  1.091089
#6 3 2 -0.872872
#7 3 3 -0.218218

dplyr

library(dplyr)
df %>% group_by(x) %>% mutate(y2 = scale(y))

and in data.table :

library(data.table)
setDT(df)[, y2 := scale(y), x]

data

df <- data.frame(x=c(1,1,2,2,3,3,3),y=c(1,3,4,3,5,2,3))

Standardize by group using data.table

After grouping by 'gr' and 'grr', loop over the Subset of Data.table (.SD), scale it (the output of scale is a matrix, so we convert it to vector with as.vector) and assign (:=) the output to the new columns.

DT[, paste0(names(DT)[1:2], ".z") := lapply(.SD, 
                  function(x) as.vector(scale(x))), .(gr, grr)]

How to scale segments of a column in an R data frame?

Apply the same function (scale) by group.

In base R

df$z <- with(df, ave(x, y, FUN = scale))
df

#    x y        z
#1   1 A -1.26491
#2   2 A -0.63246
#3   3 A  0.00000
#4   4 A  0.63246
#5   5 A  1.26491
#6  20 B -1.33242
#7  22 B -0.59219
#8  24 B  0.14805
#9  25 B  0.51816
#10 27 B  1.25840
#11 12 C -0.83028
#12 13 C -0.36901
#13 12 C -0.83028
#14 15 C  0.55352
#15 17 C  1.47605

Using dplyr

library(dplyr)
df %>%  group_by(y) %>%  mutate(z =  scale(x))

Or data.table

library(data.table)
setDT(df)[, z:= scale(x), y]

how to scale a matrix by group?

We can use data.table. Convert the 'data.frame' to 'data.table' (setDT(my.df), grouped by 'sex', selecting the columns of interest in .SDcols, we loop through the columns (lapply(.SD, ...) , do the scale and convert to vector. (The scale function output a matrix with some attributes, which will create some problems if we don't convert to vector.)

library(data.table)
setDT(my.df)[, c('x', 'y', 'z') := lapply(.SD, function(x) 
          as.vector(scale(x))) , by = sex, .SDcols= x:z]

Data.table: Apply function over groups with reference to set value in each group. Pass resulting columns into a function

An option is to subset the 'statistic' by creating a logical condition based on 'variable' with 'base_variable' element grouped by 'geography'

results[, .(variable, diff = statistic - statistic[variable == base_variable]), 
       by = geography][variable != base_variable]
# geography variable       diff
# 1:         1    bravo  0.8100971
# 2:         1  charlie -0.2091748
# 3:         1    delta  2.2217346
# 4:         2    bravo -1.1499762
# 5:         2  charlie  0.1579213
# 6:         2    delta  0.4088169
# 7:         3    bravo -0.8811697
# 8:         3  charlie  0.9359998
# 9:         3    delta -0.1859381
#10:         4    bravo -1.5934593
#11:         4  charlie  1.7461715
#12:         4    delta  0.5763070

Trying to use dplyr to group_by and apply scale()

The problem seems to be in the base scale() function, which expects a matrix. Try writing your own.

scale_this <- function(x){
  (x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)
}

Then this works:

library("dplyr")

# reproducible sample data
set.seed(123)
n = 1000
df <- data.frame(stud_ID = sample(LETTERS, size=n, replace=TRUE),
                 behavioral_scale = runif(n, 0, 10),
                 cognitive_scale = runif(n, 1, 20),
                 affective_scale = runif(n, 0, 1) )
scaled_data <- 
  df %>%
  group_by(stud_ID) %>%
  mutate(behavioral_scale_ind = scale_this(behavioral_scale),
         cognitive_scale_ind = scale_this(cognitive_scale),
         affective_scale_ind = scale_this(affective_scale))

Or, if you're open to a data.table solution:

library("data.table")

setDT(df)

cols_to_scale <- c("behavioral_scale","cognitive_scale","affective_scale")

df[, lapply(.SD, scale_this), .SDcols = cols_to_scale, keyby = factor(stud_ID)]

Scale by Group in Data.Table