Adding Grouped Mean Values to Column in Data Frame

Add a column with mean values for groups based on another column

Can use groupby transform to calculate the mean on the desired columns then join back to the initial DataFrame to add the newly created columns:

df = df.join(
    df.groupby('area')[['prod_a', 'prod_b']]
        .transform('mean')  # Calculate the mean for each group
        .rename(columns='mean {} for the area'.format)  # Rename columns 
)

df:

entity	area	prod_a	prod_b	mean prod_a for the area	mean prod_b for the area
001	A	1	3	1.5	4.5
002	B	2	4	4	4.5
003	A	2	6	1.5	4.5
004	C	7	2	5.5	5
005	C	4	8	5.5	5
006	B	6	5	4	4.5

Creating a new column based on the mean of other values in group

Compute the means of all other values within each group using a double groupby:

sum all the values within the group
subtract the current (focal) value
divide by one less than the number of items in the group

Assign the shift-ed means to a new column:

means = df.groupby("group").apply(lambda x: x.groupby("col2")["col3"].transform("sum").sub(x["col3"]).div(len(x["col1"].unique())-1)).droplevel(0)

df["mean"] = means.shift().where(df["col1"].eq(df["col1"].shift()),0)

>>> df
   col1  col2  col3  group  mean
0     A  2015    10     10   0.0
1     A  2016    20     10   9.0
2     A  2017    25     10  10.5
3     B  2015    10     10   0.0
4     B  2016    12     10   9.0
5     B  2017    14     10  14.5
6     c  2015     8     10   0.0
7     c  2016     9     10  10.0
8     c  2017    10     10  16.0
9     d  2015    50     20   0.0
10    d  2016    60     20  40.0
11    d  2017    70     20  50.0
12    e  2015    40     20   0.0
13    e  2016    50     20  50.0
14    e  2017    60     20  60.0

Dataframe: adding a column with mean by other column group

Another alternative with pd.eval and transform with mean

data['av_state'] = (data.assign(state=pd.eval(data['state']).astype(int))
                       .groupby("group")['state'].transform('mean'))

print(data)

  id group  state  value  av_state
0  1     1   True     11  0.666667
1  2     1  False     12  0.666667
2  3     2  False      5  0.500000
3  4     1   True      8  0.666667
4  5     2   True      3  0.500000

Create mean column for specific columns depending on group in R

library(tidyverse)

tribble(
  ~group, ~first, ~second, ~third,
  0, 3, 2, 4,
  0, 0, NA, 5,
  0, 2, 7, 1,
  1, 3, 1, 6,
  1, 4, 0, NA,
  1, 2, 3, 3,
  0, 5, 5, 0,
  0, 6, 2, 2,
  1, NA, 1, 3
) |> 
  rowwise() |> 
  mutate(mean = if_else(group == 0, mean(c_across(c(first, second)), na.rm = TRUE), 
                        mean(c_across(c(first, third)), na.rm = TRUE)))

#> # A tibble: 9 × 5
#> # Rowwise: 
#>   group first second third  mean
#>   <dbl> <dbl>  <dbl> <dbl> <dbl>
#> 1     0     3      2     4   2.5
#> 2     0     0     NA     5   0  
#> 3     0     2      7     1   4.5
#> 4     1     3      1     6   4.5
#> 5     1     4      0    NA   4  
#> 6     1     2      3     3   2.5
#> 7     0     5      5     0   5  
#> 8     0     6      2     2   4  
#> 9     1    NA      1     3   3

^{Created on 2022-06-08 by the reprex package (v2.0.1)}

Add column with previous values by group

use shift

df2['PreviousValues'] = df2['FN'].shift()

output:


        Date       FN   AuM PreviousValues
0       01012021    A   10  NaN
1       01012021    B   20  A
2       02012021    A   12  B
3       02012021    B   23  A

Adding a column of means by group to original data

This is what the ave function is for.

df1$Y.New <- ave(df1$Y, df1$X)

Add column with average value grouped by column

You could mask the rate column in the dataframe, GroupBy the TYPE and transform with the mean, which will exlude NaNs. The use fillna to replace the values in the masked dataframe:

ma = df.rate.mask(df.rate.eq(0))
df['rate'] = ma.fillna(ma.groupby(df.TYPE).transform('mean').fillna(0))

   ID  TYPE  rate
0   1    A   2.0
1   2    B   2.0
2   3    C   1.0
3   4    A   2.0
4   5    C   1.0
5   6    C   3.0
6   7    C   8.0
7   8    C   2.0
8   9    D   0.0

Create new column for mean by group in original dataframe in R

We can use mutate instead of summarise

library(dplyr)
df <- df %>%
        group_by(unit_id) %>%
        mutate(mean = mean(outcome))