Compute Mean and Standard Deviation by Group For Multiple Variables in a Data.Frame

How to calculate means and standard deviations for multiple grouped variables?

If it is only three sets of columns, can use across with rowMeans or rowSds

library(dplyr)
library(matrixStats)
df %>%
    mutate(AirTempMean = rowMeans(across(starts_with("AirTemp")), na.rm = TRUE), 
    AirTempSD = rowSds(as.matrix(across(starts_with("AirTemp")))),
    AirHumidityMean = rowMeans(across(starts_with("AirHumidity")), na.rm = TRUE), 
    AirHumiditySD = rowSds(as.matrix(across(starts_with("AirHumidity")))),
    PrecipitationMean = rowMeans(across(starts_with("Precipitation")), na.rm = TRUE), 
    PrecipitationSD = rowSds(as.matrix(across(starts_with("Precipitation"))))

 )

If there are many sets of columns, an option is to reshape to 'long' format and then do the mean/sd by row and then bind with the original dataset

library(tidyr)
df %>%
    select(-SiteID) %>% 
    mutate(rn = row_number()) %>%
    pivot_longer(cols= -rn, names_to = c(".value", "pval"), 
       names_pattern = "(.*)(P\\d+$)") %>%
    group_by(rn) %>% 
    summarise(across(where(is.numeric),
       list(Mean = ~ mean(., na.rm = TRUE), 
         SD = ~ sd(., na.rm = TRUE)))) %>%
    select(-rn) %>% 
    bind_cols(df, .)

-output

  SiteID AirTempP1 AirTempP2 AirTempP3 AirHumidityP1 AirHumidityP2 AirHumidityP3 PrecipitationP1 PrecipitationP2
1  KIIXB6808G  21.73691  24.96523  10.93523      12.84111      92.93506      82.79740        64.92664       70.736212
2  KIIXB6808G  29.42160  25.14421  16.28761      45.63732      56.82373      78.49595        92.42202       58.547199
3  KIIXB6808G  14.43153  17.56969  13.03869      33.29144      90.66400      43.75959        32.32953       96.171349
4  KIIXB6808G  24.35017  17.85779  11.20442      93.95450      18.58702      39.93221        26.69628       81.723180
5  KIIXB6808G  14.80084  29.38776  29.19315      94.70336      95.89065      25.89645        26.95639       28.048125
6  KIIXB6808G  27.88281  14.29717  10.24926      54.96979      83.53267      78.09418        76.69248       72.712109
7  KIIXB6808G  12.77562  22.11161  28.05708      33.02382      54.44677      20.95251        72.94213       93.959692
8  KIIXB6808G  14.85165  20.22299  10.78721      66.59833      31.77392      26.85253        95.13469       54.235009
9  KIIXB6808G  14.42898  27.83384  17.09562      53.95661      52.25697      71.31224        85.97124       23.399866
10 KIIXB6808G  12.87398  18.36380  20.59257      12.67498      53.06563      17.63772        50.60992        6.751882
   PrecipitationP3 AirTemp_Mean AirTemp_SD AirHumidity_Mean AirHumidity_SD Precipitation_Mean Precipitation_SD
1         88.71080     19.21246   7.347780         62.85786       43.61134           74.79122         12.39975
2         81.26882     23.61781   6.698725         60.31900       16.70584           77.41268         17.26350
3         83.31252     15.01330   2.320849         55.90501       30.55382           70.60447         33.76486
4         31.51854     17.80413   6.573037         50.82457       38.84645           46.64600         30.47327
5         60.96926     24.46059   8.366151         72.16349       40.07283           38.65792         19.32989
6         42.94284     17.47641   9.236680         72.19888       15.16659           64.11581         18.44402
7         19.50466     20.98143   7.703164         36.14103       16.96332           62.13549         38.38587
8         34.00365     15.28728   4.732951         41.74159       21.66675           61.12445         31.14241
9         52.38518     19.78615   7.095897         59.17527       10.54522           53.91876         31.31386
10        91.18074     17.27678   3.972451         27.79278       22.02714           49.51418         42.22509

mean and standard deviation by group for multiple variables

The function you will likely want to apply to your dataframe is aggregate() with either mean or sd as the function parameter.

Getting mean and standard deviation from groups in a data.frame

Assuming your data is in a data.frame called DF:

by(DF$HR,DF$Group,mean)

# DF$Group: 1AI
# [1] 276
# ------------------------------------------------------------------------------------------------------------------------------------------------------------- 
# DF$Group: 1AS
# [1] 246.7692
# ------------------------------------------------------------------------------------------------------------------------------------------------------------- 
# DF$Group: 1CI
# [1] 217.625
# ------------------------------------------------------------------------------------------------------------------------------------------------------------- 
# DF$Group: 1CS
# [1] 227.25

by(DF$HR,DF$Group,sd)

# DF$Group: 1AI
# [1] 30.93946
# ------------------------------------------------------------------------------------------------------------------------------------------------------------- 
# DF$Group: 1AS
# [1] 36.48551
# ------------------------------------------------------------------------------------------------------------------------------------------------------------- 
# DF$Group: 1CI
# [1] 23.25595
# ------------------------------------------------------------------------------------------------------------------------------------------------------------- 
# DF$Group: 1CS
# [1] 25.77236

Mean and standard deviation with multiple dataframes

Use concat with remove D in DataFrame.query and aggregate by GroupBy.agg with named aggregations:

df = (pd.concat([df1, df2, df3])
        .query('ID != "D"')
        .groupby('ID')
        .agg(avg=('Amount', 'mean'), std=('Amount', 'std')))
print (df)
    avg       std
ID               
A     5  3.605551
B     1  1.000000
C     2  1.000000

Or remove D in last step by DataFrame.drop:

df = (pd.concat([df1, df2, df3])
        .groupby('ID')
        .agg(avg=('Amount', 'mean'), std=('Amount', 'std'))
        .drop('D'))

Calculate standard deviation for groups of values using Python

You can use groupby(['name']) on the full data frame first, and only apply the agg on the columns of interest:

data = pd.DataFrame({'name':['AAA','AAA','BBB','BBB','CCC','CCC','CCC'],
                    'number':[10,20,1,2,5,10,10.5],
                    'difference':[0,10,0,1,0,5,0.5]})
data.groupby(['name'])['difference'].agg(['mean', 'std'])

Mean and standard deviation NOT by group for multiple variables

Assuming that you want to mean, sd for columns v1:v4.
Using base R

 f1 <- function(x,...){c(mean(x,...), sd(x))}
 apply(df[,-1],2, f1, na.rm=TRUE)
#           v1       v2       v3        v4
#[1,] 16.500000 23.00000 25.50000 12.000000
#[2,]  9.192388 15.55635 19.09188  2.828427

 sapply(df[,-1],f1)
 #           v1       v2       v3        v4
 #[1,] 16.500000 23.00000 25.50000 12.000000
 #[2,]  9.192388 15.55635 19.09188  2.828427

 aggregate(.~1, df[,-1], f1, na.rm=TRUE, na.action=NULL)
 #      v1.1      v1.2     v2.1     v2.2     v3.1     v3.2      v4.1      v4.2
 #1 16.500000  9.192388 23.00000 15.55635 25.50000 19.09188 12.000000  2.828427

 library(dplyr)
 summarise_each(df,funs(mean=mean(., na.rm=TRUE), sd), starts_with("v"))
 #  v1_mean v2_mean v3_mean v4_mean    v1_sd    v2_sd    v3_sd    v4_sd
 #1    16.5      23    25.5      12 9.192388 15.55635 19.09188 2.828427

Or using data.table

library(data.table)
setDT(df)[,lapply(.SD, function(x) c(mean(x, na.rm=TRUE), sd(x))),
                                .SDcols=paste0('v', 1:4)]
#          v1       v2       v3        v4
#1: 16.500000 23.00000 25.50000 12.000000
#2:  9.192388 15.55635 19.09188  2.828427

data

df <- structure(list(id = 1:2, v1 = c(23L, 10L), v2 = c(34L, 12L), 
v3 = c(12L, 39L), v4 = c(10L, 14L)), .Names = c("id", "v1", 
"v2", "v3", "v4"), class = "data.frame", row.names = c(NA, -2L))