How to Find the Largest N Elements in a List in R

How to find the largest N elements in a list in R?

order(R, decreasing=TRUE)[1:N]

Find the top n largest values from a dataframe (or matrix) in r

unlist and convert it into a vector, sort them and find top values. So for top 2 values we can do

tail(sort(unlist(df, use.names = FALSE)), 2)
#[1] 9.581705 9.591726

If it's a matrix you'll not require unlist

tail(sort(as.matrix(df)), 2)

data

set.seed(1233)
df = data.frame(a = runif(10,0,10),
                b = runif(10,1,10),
                c = runif(10,0,12))

How can I get top n values with its index in R?

We can use sort with index.return=TRUE to return the value with the index in a list. Then we can subset the list based on the first 3 unique elements in the 'x'.

lst <- sort(df1$distance, index.return=TRUE, decreasing=TRUE)
lapply(lst, `[`, lst$x %in% head(unique(lst$x),3))
#$x
#[1] 5 5 4 4 3

#$ix
#[1] 6 7 2 5 4

R - fastest way to get the indices of the max n elements in a vector

I want to add a hybrid approach using sort's partial argument and which:

whichpart <- function(x, n=30) {
  nx <- length(x)
  p <- nx-n
  xp <- sort(x, partial=p)[p]
  which(x > xp)
}

Some benchmarking:

library("microbenchmark")
library("data.table")
library("compiler")

set.seed(123)
x <- rnorm(1e6)
y <- sample.int(1e6)


whichpart <- function(x, n=30) {
  nx <- length(x)
  p <- nx-n
  xp <- sort(x, partial=p)[p]
  which(x > xp)
}

cpwhichpart <- cmpfun(whichpart)

# using quicksort
quicksort <- function(x, n=30) {
  sort(x, method="quick", decreasing=TRUE, index.return=TRUE)$ix[1:n]
}

cpquicksort <- cmpfun(quicksort)

# @Mariam
whichsort <- function(x, n=30) {
  which(x >= sort(x, decreasing=TRUE)[30], arr.ind=TRUE)
}

cpwhichsort <- cmpfun(whichsort)

# @Ferdinand.kraft
top <- function(x, n=30) {
    result <- numeric()
    for(i in 1:n){
        j <- which.max(x)
        result[i] <- j
        x[j] <- -Inf
    }
    result
}

cptop <- cmpfun(top)

# @Tony Breyal
dtable <- function(x, n=30) {
  dt <- data.table(x=x, x.index=seq.int(x))
  setkey(dt, "x")
  dt$x.index[1:n]
}

cpdtable <- cmpfun(dtable)

# @Roland
roland <- cmpfun(function(x, n=30) {
  y <- rep(-Inf, n)
  for (i in seq_along(x)) {
    if (x[i] > y[1]) {
      y[1] <- x[i]
      y <- y[order(y)]
    }
  }
  y
})

## rnorm
microbenchmark(whichpart(x), cpwhichpart(x),
               quicksort(x), cpquicksort(x),
               whichsort(x), cpwhichsort(x),
               top(x), cptop(x),
               dtable(x), cpdtable(x),
               roland(x), times=10)

# Unit: milliseconds
#            expr        min         lq     median         uq        max neval
#    whichpart(x)   45.63544   46.05638   47.09077   49.68452   51.42065    10
#  cpwhichpart(x)   45.65996   45.77212   47.02808   48.07482   82.20458    10
#    quicksort(x)  100.90936  103.00783  105.17506  109.31784  139.83518    10
#  cpquicksort(x)  100.53958  102.78017  107.64470  138.96630  142.52882    10
#    whichsort(x)  148.86010  151.04350  155.80871  159.47063  184.56697    10
#  cpwhichsort(x)  149.05578  150.21183  151.36918  166.58342  173.87567    10
#          top(x)  146.10757  182.42089  184.53050  191.37293  193.62272    10
#        cptop(x)  155.14354  179.14847  184.52323  196.80644  220.21222    10
#       dtable(x) 1041.32457 1042.54904 1049.26096 1065.40606 1080.89969    10
#     cpdtable(x) 1042.08247 1043.54915 1051.76366 1084.14360 1310.26485    10
#       roland(x)  251.42885  261.47608  273.20838  295.09733  323.96257    10

## integer
microbenchmark(whichpart(y), cpwhichpart(y),
               quicksort(y), cpquicksort(y),
               whichsort(y), cpwhichsort(y),
               top(y), cptop(y),
               dtable(y), cpdtable(y),
               roland(y), times=10)

# Unit: milliseconds
#            expr       min        lq    median        uq       max neval
#    whichpart(y)  11.60703  11.76857  12.03704  12.52871  47.88526    10
#  cpwhichpart(y)  11.62885  11.75006  12.53724  13.88563  46.93677    10
#    quicksort(y)  88.14924  89.47630  92.42414 103.53439 137.44335    10
#  cpquicksort(y)  88.11544  89.15334  92.63420  94.42244 133.78006    10
#    whichsort(y) 122.34675 123.13634 124.91990 127.79134 131.43400    10
#  cpwhichsort(y) 121.85618 122.91653 125.45211 127.14112 158.61535    10
#          top(y) 163.06669 181.19004 211.11557 224.19237 239.63139    10
#        cptop(y) 163.37903 173.55113 209.46770 218.59685 226.81545    10
#       dtable(y) 499.50807 505.45513 514.55338 537.84129 604.86454    10
#     cpdtable(y) 491.70016 498.62664 525.05342 527.14666 580.19429    10
#       roland(y) 235.44664 237.52200 242.87925 268.34080 287.71196    10


identical(sort(quicksort(x)), whichpart(x))
# [1] TRUE

Edit: test @flodel's suggestion

# @flodel
whichpartrev <- function(x, n=30) {
  which(x >= -sort(-x, partial=n)[n])
}

microbenchmark(whichpart(x), whichpartrev(x), times=100)

# Unit: milliseconds
#             expr      min       lq   median       uq      max neval
#     whichpart(x) 45.44940 46.15011 46.51321 48.67986 80.63286   100
#  whichpartrev(x) 28.84482 31.30661 32.87695 62.37843 67.84757   100

microbenchmark(whichpart(y), whichpartrev(y), times=100)

# Unit: milliseconds
#             expr      min       lq   median       uq      max neval
#     whichpart(y) 11.56135 12.26539 13.05729 13.75199 43.78484   100
#  whichpartrev(y) 16.00612 16.73690 17.71687 19.04153 49.02842   100

N Largest elements of the list

Here's another approach:

n_largest(L, N, R) :-
    msort(L, LS),
    length(R, N),
    append(_, R, LS).

This will count duplicates. So:

| ?- n_largest([1,4,2,6,3,4,6], 3, L).

L = [4,6,6]

yes

If you want to choose unique elements, then you can use sort:

n_largest(L, N, R) :-
    sort(L, LS),
    length(R, N),
    append(_, R, LS).

| ?- n_largest([1,4,2,6,3,4,6], 3, L).

L = [3,4,6]

yes

Note that these predicates provide the solutions in increasing order, not necessarily in the order that they occur in the original list. That was not stated as a requirement, however.

How to return 5 topmost values from vector in R?

> a <- c(1:100)
> tail(sort(a),5)
[1]  96  97  98  99 100

Make a table showing the 10 largest values of a variable in R?

This should do it...

data <- data[with(data,order(-Score)),]

data <- data[1:10,]

Fastest way to find second (third...) highest/lowest value in vector or column

Rfast has a function called nth_element that does exactly what you ask.

Further the methods discussed above that are based on partial sort, don't support finding the k smallest values

Update (28/FEB/21) package kit offers a faster implementation (topn) see https://stackoverflow.com/a/66367996/4729755, https://stackoverflow.com/a/53146559/4729755

Disclaimer: An issue appears to occur when dealing with integers which can by bypassed by using as.numeric (e.g. Rfast::nth(as.numeric(1:10), 2)), and will be addressed in the next update of Rfast.

Rfast::nth(x, 5, descending = T)

Will return the 5th largest element of x, while

Rfast::nth(x, 5, descending = F)

Will return the 5th smallest element of x

Benchmarks below against most popular answers.

For 10 thousand numbers:

N = 10000
x = rnorm(N)

maxN <- function(x, N=2){
    len <- length(x)
    if(N>len){
        warning('N greater than length(x).  Setting N=length(x)')
        N <- length(x)
    }
    sort(x,partial=len-N+1)[len-N+1]
}

microbenchmark::microbenchmark(
Rfast = Rfast::nth(x,5,descending = T),
maxn = maxN(x,5),
order = x[order(x, decreasing = T)[5]])

Unit: microseconds
  expr      min       lq      mean   median        uq       max neval
 Rfast  160.364  179.607  202.8024  194.575  210.1830   351.517   100
  maxN  396.419  423.360  559.2707  446.452  487.0775  4949.452   100
 order 1288.466 1343.417 1746.7627 1433.221 1500.7865 13768.148   100

For 1 million numbers:

N = 1e6
x = rnorm(N)

microbenchmark::microbenchmark(
Rfast = Rfast::nth(x,5,descending = T),
maxN = maxN(x,5),
order = x[order(x, decreasing = T)[5]]) 

Unit: milliseconds
  expr      min        lq      mean   median        uq       max neval
 Rfast  89.7722  93.63674  114.9893 104.6325  120.5767  204.8839   100
  maxN 150.2822 207.03922  235.3037 241.7604  259.7476  336.7051   100
 order 930.8924 968.54785 1005.5487 991.7995 1031.0290 1164.9129   100

How to Find the Largest N Elements in a List in R