How to Get Mean of Every N Rows and Keep the Date Index

How can I get mean of every n rows and keep the date index?

A short one-liner solution with data.table:

library(data.table)

setDT(df)[,.(val=mean(val)), year-0:1]
#    year   val
# 1: 1990  75.5
# 2: 1992 105.0
# 3: 1994 105.5
# 4: 1996 164.5
# 5: 1998 164.5
# 6: 2000 119.5
# 7: 2002 213.5
# 8: 2004 223.0
# 9: 2006 164.5
#10: 2008 208.5
#11: 2010 157.0
#12: 2012 116.5

How to average column values every n rows in pandas

IIUC, DataFrame.melt + mean for each site with GroupBy.mean

# df_tmp = df_tmp.astype(int) # get correct result
df_tmp.melt('site').groupby('site')['value'].mean()

Or:

# df_tmp = df_tmp.astype(int) # get correct result
df_tmp.set_index('site').stack().groupby(level=0).mean()
#df_tmp.set_index('site').stack().mean(level=0) # .mean(level=0) deprecated

Output

site
1    3.333333
2    7.333333
Name: value, dtype: float64

Calculate average of every n rows from a csv file

You can use integer division by step for consecutive groups and pass to groupby for aggregate mean:

step = 30
m_df = pd.read_csv(m_path, usecols=['Col-01']) 
df = m_df.groupby(m_df.index // step).mean()

Or:

df = m_df.groupby(np.arange(len(dfm_df// step).mean()

Sample data:

step = 3
df = m_df.groupby(m_df.index // step).mean()
print (df)
   H
0  3
1  1
2  2

Calculate the mean of every 13 rows in data frame

Here's a solution using aggregate() and rep().

df <- data.frame(a=1:12, b=13:24 );
df;
##     a  b
## 1   1 13
## 2   2 14
## 3   3 15
## 4   4 16
## 5   5 17
## 6   6 18
## 7   7 19
## 8   8 20
## 9   9 21
## 10 10 22
## 11 11 23
## 12 12 24
n <- 5;
aggregate(df, list(rep(1:(nrow(df) %/% n + 1), each = n, len = nrow(df))), mean)[-1];
##      a    b
## 1  3.0 15.0
## 2  8.0 20.0
## 3 11.5 23.5

The important part of this solution that handles the issue of non-divisibility of nrow(df) by n is specifying the len parameter (actually the full parameter name is length.out) of rep(), which automatically caps the group vector to the appropriate length.

Python Pandas: Mean every n rows in a new column repeated n times

Use GroupBy.transform with integer division by index or helper array by length of DataFrame:

nrow = 3

#if default RangeIndex
df['c3'] = df.groupby(df.index // nrow)['c2'].transform('mean')

#alternative if not default RangeIndex
#df['c3'] = df.groupby(np.arange(df) // nrow)['c2'].transform('mean')
print(df)

  c1  c2  c3
0  A   1   2
1  B   2   2
2  C   3   2
3  D   3   4
4  E   4   4
5  F   5   4

Average and median of each n rows, for all columns of a dataframe, while keeping a datetime object as index

For this data:

> dput(df)
df <- structure(list(Year = c(2000L, 2000L, 2000L, 2000L, 2000L, 2000L
), Day = c(122L, 122L, 122L, 122L, 122L, 122L), Hour = c(0L, 
0L, 0L, 0L, 0L, 0L), Min = c(1L, 1L, 2L, 2L, 2L, 2L), Sec. = c(38.01, 
50.1, 2.19, 14.28, 26.38, 38.47), E1.S1 = c(3.31, 1.98, 1.98, 
2.65, 3.97, 2.65), E1.S2 = c(0.662, 3.31, 1.32, 1.32, 6.62, 0.662
), E1.S3 = c(0.662, 1.98, 3.97, 2.65, 0.662, 3.31), E1.S4 = c(2.65, 
1.98, 1.98, 3.31, 3.31, 1.98), E1.S5 = c(1.32, 1.98, 1.32, 2.65, 
3.31, 1.32), E1.S6 = c(0, 1.32, 0.662, 1.32, 4.63, 1.98), E1.S7 = c(3.31, 
4.63, 0.662, 3.97, 5.29, 1.98), E1.S8 = c(1.32, 1.32, 3.97, 2.65, 
1.98, 2.65), E2.S1 = c(1.98, 1.32, 1.32, 2.65, 0, 0.662), E2.S2 = c(1.98, 
0.662, 0.662, 0, 0, 1.32), E2.S3 = c(0.662, 0, 1.32, 0.662, 1.98, 
1.98), E2.S4 = c(0, 3.31, 0.662, 2.65, 0.662, 1.32)), .Names = c("Year", 
"Day", "Hour", "Min", "Sec.", "E1.S1", "E1.S2", "E1.S3", "E1.S4", 
"E1.S5", "E1.S6", "E1.S7", "E1.S8", "E2.S1", "E2.S2", "E2.S3", 
"E2.S4"), class = "data.frame", row.names = c("1", "2", "3", 
"4", "5", "6"))

This works:

lapply(split(df, ceiling(seq_len(nrow(df)) / 5)), colMeans)
# $`1`
#      Year       Day      Hour       Min      Sec.     E1.S1     E1.S2     E1.S3     E1.S4     E1.S5     E1.S6     E1.S7 
# 2000.0000  122.0000    0.0000    1.6000   26.1920    2.7780    2.6464    1.9848    2.6460    2.1160    1.5864    3.5724 
#     E1.S8     E2.S1     E2.S2     E2.S3     E2.S4 
#    2.2480    1.4540    0.6608    0.9248    1.4568 
# 
# $`2`
#     Year      Day     Hour      Min     Sec.    E1.S1    E1.S2    E1.S3    E1.S4    E1.S5    E1.S6    E1.S7    E1.S8 
# 2000.000  122.000    0.000    2.000   38.470    2.650    0.662    3.310    1.980    1.320    1.980    1.980    2.650 
#    E2.S1    E2.S2    E2.S3    E2.S4 
#    0.662    1.320    1.980    1.320 
#

Then you can just bind them:

do.call(rbind, lapply(split(df, ceiling(seq_len(nrow(df)) / 5)), colMeans))
#   Year Day Hour Min   Sec. E1.S1  E1.S2  E1.S3 E1.S4 E1.S5  E1.S6  E1.S7 E1.S8 E2.S1  E2.S2  E2.S3  E2.S4
# 1 2000 122    0 1.6 26.192 2.778 2.6464 1.9848 2.646 2.116 1.5864 3.5724 2.248 1.454 0.6608 0.9248 1.4568
# 2 2000 122    0 2.0 38.470 2.650 0.6620 3.3100 1.980 1.320 1.9800 1.9800 2.650 0.662 1.3200 1.9800 1.3200

Note: It also helps to check if all the columns you want to take mean of are either integers or numeric by doing:

> sapply(df, class)
#      Year       Day      Hour       Min      Sec.     E1.S1     E1.S2     E1.S3     E1.S4     E1.S5     E1.S6     E1.S7 
# "integer" "integer" "integer" "integer" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
#     E1.S8     E2.S1     E2.S2     E2.S3     E2.S4 
# "numeric" "numeric" "numeric" "numeric" "numeric"

Edit: Following OP's comment:

idx <- ceiling(seq_len(nrow(dd)) / 5)
# do colMeans on all columns except last one.
res <- lapply(split(dd[-(ncol(dd))], idx), colMeans, na.rm = TRUE)
# assign first value of "datetime" in each 5-er group as names to list
names(res) <- dd$datetime[seq(1, nrow(df), by=5)]
# bind them to give a matrix
res <- do.call(rbind, res)

Alternatively, if you want a data.frame and datetime as a column:

idx <- ceiling(seq_len(nrow(dd)) / 5)
res <- as.data.frame(do.call(rbind, lapply(split(dd[-(ncol(dd))], idx), 
                 colMeans, na.rm = TRUE)))
res$datetime <- dd$datetime[seq(1, nrow(dd), by=5)]