Max and Min Functions That Are Similar to Colmeans

max and min functions that are similar to colMeans

pmax is ~ 10x faster than apply. Still not as fast as colMeans though.

data = matrix(rnorm(10^6), 100)
data.df = data.frame(t(data))

system.time(apply(data, MARGIN=c(2), max))
system.time(do.call(pmax, data.df))
system.time(colMeans(data))

> system.time(apply(data, MARGIN=c(2), max))
   user  system elapsed 
  0.133   0.006   0.139 
> system.time(do.call(pmax, data.df))
   user  system elapsed 
  0.013   0.000   0.013 
> system.time(colMeans(data))
   user  system elapsed 
  0.003   0.000   0.002

Using by() for min & max for multiple factors in R

I think the "doBy" package might be useful here. It summarizes the data by groups and returns a data.frame object which will allow you to do any further manipulation. Try this:

install.packages("doBy")
library(doBy)

df <- summaryBy(Sepal.Length + Sepal.Width + Petal.Length + Petal.Width ~ Species,data=iris,
                FUN=function(x){c(min=min(x),max=max(x), mean=mean(x))})

Variables that come before the "~" are the variables you want to summarize while the variables that come after the "~" are the ones you want to group by. So what the above is doing is summarizing: 1. Sepal.Length, 2. Sepal.Width, 3.Petal.Length and 4.Petal.Width by Species.

You can add more summary statistics in the function(x) argument as well.

Column-wise max in R

You can write your own c++ function using Rcpp:

#include <Rcpp.h> 
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector colMaxRcpp(NumericMatrix X) {
    int ncol = X.ncol();
    Rcpp::NumericVector out(ncol);
    for (int col = 0; col < ncol; col++){
        out[col]=Rcpp::max(X(_, col)); 
    } 
    return wrap(out);
}

Here some benchmarking:

A <- matrix(rnorm(1e6),ncol=10000)
apply.max <- function(A) apply(A, 2, max)
identical(colMaxRcpp(A),apply.max(A))
[1] TRUE

library(microbenchmark)
microbenchmark(colMaxRcpp(A),apply.max(A),times=1)
Unit: milliseconds
          expr      min       lq   median       uq      max neval
 colMaxRcpp(A) 11.57765 11.57765 11.57765 11.57765 11.57765     1
  apply.max(A) 79.66040 79.66040 79.66040 79.66040 79.66040     1

EDIT add benchmarking for a matrix 30*30. Rcpp is ate least 12 times faster.

A <- matrix(rnorm(30*30),ncol=30)
Unit: microseconds
          expr     min      lq   median      uq      max neval
 colMaxRcpp(A)  13.274  14.033  15.1715  18.584   32.238    10
  apply.max(A) 162.702 166.495 174.0805 189.251 1310.716    10

R: How to take the min and max or other functions of every n rows

You can use sapply and split for this:

sapply(split(df$value, rep(1:(nrow(df)/5), each=5)), mean)
sapply(split(df$value, rep(1:(nrow(df)/5), each=5)), min)
sapply(split(df$value, rep(1:(nrow(df)/5), each=5)), max)

If you want the outputs in a matrix you can use what @lmo proposed in the comments:

sapply(split(df$value, rep(1:(nrow(df)/5), each=5)),
                       function(x) c(mean=mean(x), min=min(x), max=max(x)))

Update

How to get statistic and p-value from a sample t-test for each n-row: This would be a bit harder to implement. Look below;

#mu=3 for sample t-test
t_test_list <- sapply(split(df$value, rep(1:(nrow(df)/5), each=5)), t.test, mu=3) 

p_value_list <- lapply(as.data.frame(t_test_list),function(x) x$p.value)
statistic_list <- lapply(as.data.frame(t_test_list),function(x) x$statistic)

p_value_list and statistic_list are p.value and statistic for each 5 rows.

Calcing Rolling Min/Max Values on a time series

You can try the following :

library(zoo)
library(dplyr)

df %>%
  mutate(across(High:Close, ~rollapply(.x, 10, 
                min, align = 'left', partial = TRUE), .names = '{col}_min'),  
         rolling_min = pmin(High_min, Low_min, Close_min), 
         across(High:Close, ~rollapply(.x, 10, 
                max, align = 'left', partial = TRUE), .names = '{col}_max'),  
         rolling_max = pmax(High_max, Low_max, Close_max)) %>%
  select(Date:Close, rolling_min, rolling_max)

This returns :

#         Date   High    Low  Close rolling_min rolling_max
#1  12/16/2020 371.16 368.87 370.17      363.26      371.16
#2  12/15/2020 369.59 365.92 369.59      363.26      371.05
#3  12/14/2020 369.80 364.47 364.66      363.26      371.05
#4  12/11/2020 366.74 363.26 366.30      359.17      371.05
#5  12/10/2020 367.86 364.43 366.73      359.17      371.05
#6   12/9/2020 371.05 365.95 366.85      359.17      371.05
#7   12/8/2020 370.78 367.67 370.17      359.17      370.78
#8   12/7/2020 369.62 367.72 369.09      354.87      369.85
#9   12/4/2020 369.85 367.22 369.85      354.87      369.85
#10  12/3/2020 368.19 365.50 366.69      354.15      368.19
#11  12/2/2020 366.96 364.20 366.79      354.15      367.68
#12  12/1/2020 367.68 364.93 366.02      354.15      367.68
#13 11/30/2020 363.12 359.17 362.06      354.15      364.18
#14 11/27/2020 364.18 362.58 363.67      354.15      364.18
#15 11/25/2020 363.16 361.48 362.66      354.15      363.81
#16 11/24/2020 363.81 359.29 363.22      354.15      363.81
#17 11/23/2020 358.82 354.87 357.46      354.15      361.50
#18 11/20/2020 357.72 355.25 355.33      354.15      361.50
#19 11/19/2020 358.18 354.15 357.78      354.15      361.50
#20 11/18/2020 361.50 356.24 356.28      356.24      361.50

For the 3 columns we calculate rolling min and max and then using pmin and pmax we gather one min and max for each row.

Extract min and max information by sequential similar parts of data frame in R

We need a run-length encoding to track consecutive Soil.

Using this function (fashioned to mimic data.table::rleid):

myrleid <- function (x) {
    r <- rle(x)
    rep(seq_along(r$lengths), times = r$lengths)
}

We can do

df %>%
  group_by(grp = myrleid(Soil)) %>%
  summarize(Soil = Soil[1], across(c(Distance, Elevation), list(min = min, max = max))) %>%
  select(-grp)
# # A tibble: 5 x 5
#   Soil   Distance_min Distance_max Elevation_min Elevation_max
#   <chr>         <dbl>        <dbl>         <dbl>         <dbl>
# 1 Forest            1            5          1499          1500
# 2 Grass            10           56          1456          1470
# 3 Scrub            59           99          1435          1450
# 4 Grass           102          139          1400          1430
# 5 Forest          143          230          1370          1390

How to calculate mean , min, and max across when grouping using dplyr?

You can try something like this:

   library(dplyr)
   df %>% 
   group_by(ID) %>%
   summarise(mean_ = mean(c_across(A:C), na.rm = T),
             medi_ = median(c_across(A:C), na.rm = T),
             max_  = max(c_across(A:C), na.rm = T),
             min_  = min(c_across(A:C), na.rm = T))
    
    `summarise()` ungrouping output (override with `.groups` argument)
    # A tibble: 3 x 5
         ID mean_ medi_  max_  min_
      <int> <dbl> <dbl> <int> <int>
    1     1  3      3       6     0
    2     2  3.5    3       9     0
    3     3  2.33   2.5     5     0

For the second part:

df %>% 
   rowwise() %>%
   summarise(mean_ = mean(c_across(A:C), na.rm = T),
             medi_ = median(c_across(A:C), na.rm = T),
             max_  = max(c_across(A:C), na.rm = T),
             min_  = min(c_across(A:C), na.rm = T))

`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 6 x 4
  mean_ medi_  max_  min_
  <dbl> <int> <int> <int>
1  2        1     5     0
2  2        3     3     0
3  1        1     2     0
4  5        5     9     1
5  3.67     3     5     3
6  4        4     6     2

With data:

df <- structure(list(ID = c(1L, 2L, 3L, 2L, 3L, 1L), A = c(1L, 3L, 
0L, 5L, 3L, 2L), B = c(5L, 0L, 2L, 9L, 5L, 6L), C = c(0L, 3L, 
1L, 1L, 3L, 4L)), class = "data.frame", row.names = c(NA, -6L
))

Calculate min, maximum and mean in R

If we are looking for function to find the max and min of each column of matrix, colMaxs and colMins from matrixStats can be used.

library(matrixStats)
colMaxs(mat)
#[1]  7  8 20

colMins(mat)
#[1] 3 1 7

But, if this is to find for every 5 rows of dataset columns, use gl to create the grouping index for each 5 rows, and then with the help of by we get the colMaxs or colMins or colMeans

by(data, list(gr=as.numeric(gl(nrow(data), 5, nrow(data)))), 
                 FUN = function(x) colMaxs(as.matrix(x)))

The same way, we can find the colMins or colMeans

by(data, list(gr=as.numeric(gl(nrow(data), 5, nrow(data)))),
             FUN = function(x) colMins(as.matrix(x)))

by(data, list(gr=as.numeric(gl(nrow(data), 5, nrow(data)))),
             FUN = function(x) colMeans(as.matrix(x)))

The above can be done in a compact way with dplyr

 library(dplyr)
 data %>%
    group_by(gr = as.numeric(gl(nrow(.), 5, nrow(.)))) %>%
    summarise_each(funs(min, max, mean))

To do the plotting, may be we can extend this with ggplot

library(ggplot2)
library(tidyr)
data %>% 
    group_by(gr = as.numeric(gl(nrow(.), 5, nrow(.)))) %>%
    summarise_each(funs(min, max, mean)) %>%
    gather(Var, Val, -gr) %>% 
    separate(Var, into = c("Var1", "Var2")) %>%
    ggplot(., aes(x=factor(gr), y=Val, fill=Var2)) + 
           geom_bar(stat="identity")+
           facet_wrap(~Var1)

data

mat <- matrix(c(3,1,20,5,4,12,6,2,9,7,8,7), byrow=T, ncol=3) 
set.seed(24)
data <- data.frame(Pb = sample(1:9, 42, replace=TRUE), Ps = rnorm(42))

How to find the highest value of a column in a data frame in R?

Similar to colMeans, colSums, etc, you could write a column maximum function, colMax, and a column sort function, colSort.

colMax <- function(data) sapply(data, max, na.rm = TRUE)
colSort <- function(data, ...) sapply(data, sort, ...)

I use ... in the second function in hopes of sparking your intrigue.

Get your data:

dat <- read.table(h=T, text = "Ozone Solar.R Wind Temp Month Day
1     41     190  7.4   67     5   1
2     36     118  8.0   72     5   2
3     12     149 12.6   74     5   3
4     18     313 11.5   62     5   4
5     NA      NA 14.3   56     5   5
6     28      NA 14.9   66     5   6
7     23     299  8.6   65     5   7
8     19      99 13.8   59     5   8
9      8      19 20.1   61     5   9")

Use colMax function on sample data:

colMax(dat)
#  Ozone Solar.R    Wind    Temp   Month     Day 
#   41.0   313.0    20.1    74.0     5.0     9.0

To do the sorting on a single column,

sort(dat$Solar.R, decreasing = TRUE)
# [1] 313 299 190 149 118  99  19

and over all columns use our colSort function,

colSort(dat, decreasing = TRUE) ## compare with '...' above

Max and Min Functions That Are Similar to Colmeans