How to Rank Within Groups in R

How to rank within groups in R?

You can do this pretty cleanly with dplyr

library(dplyr)
df %>%
group_by(customer_name) %>%
mutate(my_ranks = order(order(order_values, order_dates, decreasing=TRUE)))

Source: local data frame [5 x 4]
Groups: customer_name

customer_name order_dates order_values my_ranks
1 John 2010-11-01 15 3
2 Bob 2008-03-25 12 1
3 Alex 2009-11-15 5 1
4 John 2012-08-06 15 2
5 John 2015-05-07 20 1

Ranking within each group

dplyr way:

library(dplyr)
d %>%
arrange(group, y, z) %>%
group_by(group) %>%
mutate(rank = 1:n()) %>%
ungroup()

We first sort the data.frame by group then y and then z, then group it by group and then assign the rank for each observation.

Result:

# A tibble: 12 × 5
x y z group rank
<int> <dbl> <dbl> <int> <int>
1 1 0.00000 6.988904 1 1
2 1 0.00000 329.283431 1 2
3 1 0.00000 353.287515 1 3
4 0 35.51413 0.000000 1 4
5 0 47.87604 0.000000 1 5
6 0 272.62365 0.000000 1 6
7 1 0.00000 212.491666 2 1
8 1 0.00000 257.076377 2 2
9 1 0.00000 326.760675 2 3
10 1 0.00000 889.022577 2 4
11 0 48.02147 0.000000 2 5
12 0 84.97861 0.000000 2 6

Apply a rank across groups

You could try

library(dplyr)

data %>%
group_by(Grp) %>%
mutate(Rank = Value[which.max(YEAR)]) %>%
ungroup() %>%
mutate(Rank = dense_rank(-Rank))

# YEAR Grp Value Rank
# 1 2020 A 25 3
# 2 2019 A 24 3
# 3 2020 B 35 2
# 4 2019 B 34 2
# 5 2020 C 45 1
# 6 2019 C 44 1

Rank variable by group (dplyr)

The following produces the desired result as was specified.

library(dplyr)

by_species <- iris %>% arrange(Species, Sepal.Length) %>%
group_by(Species) %>%
mutate(rank = rank(Sepal.Length, ties.method = "first"))

by_species %>% filter(rank <= 3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species rank
## (dbl) (dbl) (dbl) (dbl) (fctr) (int)
##1 4.3 3.0 1.1 0.1 setosa 1
##2 4.4 2.9 1.4 0.2 setosa 2
##3 4.4 3.0 1.3 0.2 setosa 3
##4 4.9 2.4 3.3 1.0 versicolor 1
##5 5.0 2.0 3.5 1.0 versicolor 2
##6 5.0 2.3 3.3 1.0 versicolor 3
##7 4.9 2.5 4.5 1.7 virginica 1
##8 5.6 2.8 4.9 2.0 virginica 2
##9 5.7 2.5 5.0 2.0 virginica 3

by_species %>% slice(1:3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species rank
## (dbl) (dbl) (dbl) (dbl) (fctr) (int)
##1 4.3 3.0 1.1 0.1 setosa 1
##2 4.4 2.9 1.4 0.2 setosa 2
##3 4.4 3.0 1.3 0.2 setosa 3
##4 4.9 2.4 3.3 1.0 versicolor 1
##5 5.0 2.0 3.5 1.0 versicolor 2
##6 5.0 2.3 3.3 1.0 versicolor 3
##7 4.9 2.5 4.5 1.7 virginica 1
##8 5.6 2.8 4.9 2.0 virginica 2
##9 5.7 2.5 5.0 2.0 virginica 3

R data frame rank by groups (group by rank) with package dplyr

Had a similar issue, my answer was sorting on groups and the relevant ranked variable(s) in order to then use row_number() when using group_by.

# Sample dataset
df <- data.frame(group=rep(c("GROUP 1", "GROUP 2"),10),
value=as.integer(rnorm(20, mean=1000, sd=500)))
require(dplyr)
print.data.frame(df[0:10,])
group value
1 GROUP 1 1273
2 GROUP 2 1261
3 GROUP 1 1189
4 GROUP 2 1390
5 GROUP 1 1942
6 GROUP 2 1111
7 GROUP 1 530
8 GROUP 2 893
9 GROUP 1 997
10 GROUP 2 237

sorted <- df %>%
arrange(group, -value) %>%
group_by(group) %>%
mutate(rank=row_number())
print.data.frame(sorted)

group value rank
1 GROUP 1 1942 1
2 GROUP 1 1368 2
3 GROUP 1 1273 3
4 GROUP 1 1249 4
5 GROUP 1 1189 5
6 GROUP 1 997 6
7 GROUP 1 562 7
8 GROUP 1 535 8
9 GROUP 1 530 9
10 GROUP 1 1 10
11 GROUP 2 1472 1
12 GROUP 2 1390 2
13 GROUP 2 1281 3
14 GROUP 2 1261 4
15 GROUP 2 1111 5
16 GROUP 2 893 6
17 GROUP 2 774 7
18 GROUP 2 669 8
19 GROUP 2 631 9
20 GROUP 2 237 10

Rank within groups in R with special NA handling

We can group by 'B', rank on 'C', specify the i with a logical condition to select only the non-NA elements from 'C' and assign (:=) the rank values to create the 'RANK' column. By default, the rows that are not used i.e. NA will be NA in the new column

library(data.table)
setDT(df)[!is.na(C), RANK := rank(-C) , B]
df
# A B C RANK
# 1: A V1 1 4.0
# 2: A V2 2 3.5
# 3: A V3 3 3.0
# 4: B V1 5 1.0
# 5: B V2 2 3.5
# 6: B V3 NA NA
# 7: C V1 4 2.0
# 8: C V2 6 2.0
# 9: C V3 7 2.0
#10: D V1 3 3.0
#11: D V2 7 1.0
#12: D V3 8 1.0

Rank within group in for loop in R

Not sure if this is exactly what you wanted but with the data.frame below this worked for me. Hope it helps

df <- data.frame(Category=c(rep("Orange",10), rep("Banana",10)),
Score.08.2007=c(runif(6),rep(NA,4),runif(4),rep(NA,2),runif(4)),
Score.09.2017=c(runif(5),rep(NA,3),runif(2),runif(4),rep(NA,4),runif(2)),
stringsAsFactors=F)

dplyr solution

library(dplyr)

eq_ranks <- function(theseCols, newCols, df){
theseCols <- enquo(theseCols)
df1 <- df %>%
group_by(Category) %>%
mutate_at(vars(!!theseCols), funs(rank(., na.last="keep"))) %>%
ungroup() %>%
select(-Category) %>%
setNames(newCols)
df2 <- cbind(df, df1)
return(df2)
}

aux <- colnames(df)[-1]
newCols <- sub("Score", "Rank", aux)
eq_ranks(aux,newCols,df)

Output

structure(list(Category = c("Orange", "Orange", "Orange", "Orange", 
"Orange", "Orange", "Orange", "Orange", "Orange", "Orange", "Banana",
"Banana", "Banana", "Banana", "Banana", "Banana", "Banana", "Banana",
"Banana", "Banana"), Score.08.2007 = c(0.757087148027495, 0.202692255144939,

0.711121222469956, 0.121691921027377, 0.245488513959572, 0.14330437942408,
NA, NA, NA, NA, 0.239629415096715, 0.0589343772735447, 0.642288258532062,
0.876269212691113, NA, NA, 0.778914677444845, 0.79730882588774,
0.455274453619495, 0.410084082046524), Score.09.2017 = c(0.810870242770761,
0.604933290276676, 0.654723928077146, 0.353197271935642, 0.270260145887733,
NA, NA, NA, 0.99268406117335, 0.633493264438584, 0.213208135217428,
0.129372348077595, 0.478118034312502, 0.924074469832703, NA,
NA, NA, NA, 0.59876096714288, 0.976170694921166), Rank.08.2007 = c(6,
3, 5, 1, 4, 2, NA, NA, NA, NA, 2, 1, 5, 8, NA, NA, 6, 7, 4, 3
), Rank.09.2017 = c(6, 3, 5, 2, 1, NA, NA, NA, 7, 4, 2, 1, 3,
5, NA, NA, NA, NA, 4, 6)), .Names = c("Category", "Score.08.2007",
"Score.09.2017", "Rank.08.2007", "Rank.09.2017"), row.names = c(NA,
-20L), class = "data.frame")

Get ranking within each group for r dataframe

Looks like people forgot about your question. Hope this doesn't come too late ^^

library(dplyr)

df %>% group_by(Gene) %>% mutate(Rank = dense_rank(desc(Expression)))

> df
# A tibble: 7 x 4
# Groups: Gene [2]
Gene Expression Sample Rank
<chr> <dbl> <chr> <dbl>
1 ENSG00000000003 2.82 HSB671 1
2 ENSG00000000003 2.79 HSB431 2
3 ENSG00000000003 2.40 HSB618 3
4 ENSG00000000938 1.75 HSB671 1
5 ENSG00000000938 1.52 HSB670 2
6 ENSG00000000938 0.835 HSB414 3
7 ENSG00000000938 0.622 HSB459 4

Or with base R:

df$Rank <- ave(-df$Expression, df$Gene, FUN = rank)



Related Topics



Leave a reply



Submit