How to Rank Within Groups in R

How to rank within groups in R?

You can do this pretty cleanly with dplyr

library(dplyr)
df %>%
    group_by(customer_name) %>%
    mutate(my_ranks = order(order(order_values, order_dates, decreasing=TRUE)))

Source: local data frame [5 x 4]
Groups: customer_name

  customer_name order_dates order_values my_ranks
1          John  2010-11-01           15        3
2           Bob  2008-03-25           12        1
3          Alex  2009-11-15            5        1
4          John  2012-08-06           15        2
5          John  2015-05-07           20        1

Ranking within each group

dplyr way:

library(dplyr)
d %>% 
    arrange(group, y, z) %>% 
    group_by(group) %>% 
    mutate(rank = 1:n()) %>% 
    ungroup()

We first sort the data.frame by group then y and then z, then group it by group and then assign the rank for each observation.

Result:

# A tibble: 12 × 5
       x         y          z group  rank
   <int>     <dbl>      <dbl> <int> <int>
1      1   0.00000   6.988904     1     1
2      1   0.00000 329.283431     1     2
3      1   0.00000 353.287515     1     3
4      0  35.51413   0.000000     1     4
5      0  47.87604   0.000000     1     5
6      0 272.62365   0.000000     1     6
7      1   0.00000 212.491666     2     1
8      1   0.00000 257.076377     2     2
9      1   0.00000 326.760675     2     3
10     1   0.00000 889.022577     2     4
11     0  48.02147   0.000000     2     5
12     0  84.97861   0.000000     2     6

Apply a rank across groups

You could try

library(dplyr)

data %>%
  group_by(Grp) %>%
  mutate(Rank = Value[which.max(YEAR)]) %>%
  ungroup() %>% 
  mutate(Rank = dense_rank(-Rank))

#   YEAR Grp Value Rank
# 1 2020   A    25    3
# 2 2019   A    24    3
# 3 2020   B    35    2
# 4 2019   B    34    2
# 5 2020   C    45    1
# 6 2019   C    44    1

Rank variable by group (dplyr)

The following produces the desired result as was specified.

library(dplyr)

by_species <- iris %>% arrange(Species, Sepal.Length) %>%
    group_by(Species) %>% 
    mutate(rank = rank(Sepal.Length, ties.method = "first"))

by_species %>% filter(rank <= 3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
##  Sepal.Length Sepal.Width Petal.Length Petal.Width    Species  rank
##         (dbl)       (dbl)        (dbl)       (dbl)     (fctr) (int)
##1          4.3         3.0          1.1         0.1     setosa     1
##2          4.4         2.9          1.4         0.2     setosa     2
##3          4.4         3.0          1.3         0.2     setosa     3
##4          4.9         2.4          3.3         1.0 versicolor     1
##5          5.0         2.0          3.5         1.0 versicolor     2
##6          5.0         2.3          3.3         1.0 versicolor     3
##7          4.9         2.5          4.5         1.7  virginica     1
##8          5.6         2.8          4.9         2.0  virginica     2
##9          5.7         2.5          5.0         2.0  virginica     3

by_species %>% slice(1:3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
##  Sepal.Length Sepal.Width Petal.Length Petal.Width    Species  rank
##         (dbl)       (dbl)        (dbl)       (dbl)     (fctr) (int)
##1          4.3         3.0          1.1         0.1     setosa     1
##2          4.4         2.9          1.4         0.2     setosa     2
##3          4.4         3.0          1.3         0.2     setosa     3
##4          4.9         2.4          3.3         1.0 versicolor     1
##5          5.0         2.0          3.5         1.0 versicolor     2
##6          5.0         2.3          3.3         1.0 versicolor     3
##7          4.9         2.5          4.5         1.7  virginica     1
##8          5.6         2.8          4.9         2.0  virginica     2
##9          5.7         2.5          5.0         2.0  virginica     3

R data frame rank by groups (group by rank) with package dplyr

Had a similar issue, my answer was sorting on groups and the relevant ranked variable(s) in order to then use row_number() when using group_by.

# Sample dataset
df <- data.frame(group=rep(c("GROUP 1", "GROUP 2"),10),
               value=as.integer(rnorm(20, mean=1000, sd=500)))
require(dplyr)
print.data.frame(df[0:10,])
   group value
1  GROUP 1  1273
2  GROUP 2  1261
3  GROUP 1  1189
4  GROUP 2  1390
5  GROUP 1  1942
6  GROUP 2  1111
7  GROUP 1   530
8  GROUP 2   893
9  GROUP 1   997
10 GROUP 2   237

sorted <- df %>% 
          arrange(group, -value) %>%
          group_by(group) %>%
          mutate(rank=row_number())
print.data.frame(sorted)

      group value rank
1  GROUP 1  1942    1
2  GROUP 1  1368    2
3  GROUP 1  1273    3
4  GROUP 1  1249    4
5  GROUP 1  1189    5
6  GROUP 1   997    6
7  GROUP 1   562    7
8  GROUP 1   535    8
9  GROUP 1   530    9
10 GROUP 1     1   10
11 GROUP 2  1472    1
12 GROUP 2  1390    2
13 GROUP 2  1281    3
14 GROUP 2  1261    4
15 GROUP 2  1111    5
16 GROUP 2   893    6
17 GROUP 2   774    7
18 GROUP 2   669    8
19 GROUP 2   631    9
20 GROUP 2   237   10

Rank within groups in R with special NA handling

We can group by 'B', rank on 'C', specify the i with a logical condition to select only the non-NA elements from 'C' and assign (:=) the rank values to create the 'RANK' column. By default, the rows that are not used i.e. NA will be NA in the new column

library(data.table)
setDT(df)[!is.na(C),  RANK := rank(-C) , B]
df
#    A  B  C RANK
# 1: A V1  1  4.0
# 2: A V2  2  3.5
# 3: A V3  3  3.0
# 4: B V1  5  1.0
# 5: B V2  2  3.5
# 6: B V3 NA   NA
# 7: C V1  4  2.0
# 8: C V2  6  2.0
# 9: C V3  7  2.0
#10: D V1  3  3.0
#11: D V2  7  1.0
#12: D V3  8  1.0

Rank within group in for loop in R

Not sure if this is exactly what you wanted but with the data.frame below this worked for me. Hope it helps

df <- data.frame(Category=c(rep("Orange",10), rep("Banana",10)),
             Score.08.2007=c(runif(6),rep(NA,4),runif(4),rep(NA,2),runif(4)),
             Score.09.2017=c(runif(5),rep(NA,3),runif(2),runif(4),rep(NA,4),runif(2)),
             stringsAsFactors=F)

dplyr solution

library(dplyr)

eq_ranks <- function(theseCols, newCols, df){
               theseCols <- enquo(theseCols)
               df1 <- df %>%
                       group_by(Category) %>%
                       mutate_at(vars(!!theseCols), funs(rank(., na.last="keep"))) %>%
                       ungroup() %>%
                       select(-Category) %>%
                       setNames(newCols)
               df2 <- cbind(df, df1)
               return(df2)
            }

aux <- colnames(df)[-1]
newCols <- sub("Score", "Rank", aux)
eq_ranks(aux,newCols,df)

Output

structure(list(Category = c("Orange", "Orange", "Orange", "Orange", 
"Orange", "Orange", "Orange", "Orange", "Orange", "Orange", "Banana", 
"Banana", "Banana", "Banana", "Banana", "Banana", "Banana", "Banana", 
"Banana", "Banana"), Score.08.2007 = c(0.757087148027495, 0.202692255144939, 

0.711121222469956, 0.121691921027377, 0.245488513959572, 0.14330437942408, 
NA, NA, NA, NA, 0.239629415096715, 0.0589343772735447, 0.642288258532062, 
0.876269212691113, NA, NA, 0.778914677444845, 0.79730882588774, 
0.455274453619495, 0.410084082046524), Score.09.2017 = c(0.810870242770761, 
0.604933290276676, 0.654723928077146, 0.353197271935642, 0.270260145887733, 
NA, NA, NA, 0.99268406117335, 0.633493264438584, 0.213208135217428, 
0.129372348077595, 0.478118034312502, 0.924074469832703, NA, 
NA, NA, NA, 0.59876096714288, 0.976170694921166), Rank.08.2007 = c(6, 
3, 5, 1, 4, 2, NA, NA, NA, NA, 2, 1, 5, 8, NA, NA, 6, 7, 4, 3
), Rank.09.2017 = c(6, 3, 5, 2, 1, NA, NA, NA, 7, 4, 2, 1, 3, 
5, NA, NA, NA, NA, 4, 6)), .Names = c("Category", "Score.08.2007", 
"Score.09.2017", "Rank.08.2007", "Rank.09.2017"), row.names = c(NA, 
-20L), class = "data.frame")

Get ranking within each group for r dataframe

Looks like people forgot about your question. Hope this doesn't come too late ^^

library(dplyr)

df %>% group_by(Gene) %>% mutate(Rank = dense_rank(desc(Expression)))

> df
# A tibble: 7 x 4
# Groups:   Gene [2]
  Gene            Expression Sample  Rank
  <chr>                <dbl> <chr>  <dbl>
1 ENSG00000000003      2.82  HSB671     1
2 ENSG00000000003      2.79  HSB431     2
3 ENSG00000000003      2.40  HSB618     3
4 ENSG00000000938      1.75  HSB671     1
5 ENSG00000000938      1.52  HSB670     2
6 ENSG00000000938      0.835 HSB414     3
7 ENSG00000000938      0.622 HSB459     4

Or with base R:

df$Rank <- ave(-df$Expression, df$Gene, FUN = rank)

How to Rank Within Groups in R