How to rank within groups in R?
You can do this pretty cleanly with dplyr
library(dplyr)
df %>%
group_by(customer_name) %>%
mutate(my_ranks = order(order(order_values, order_dates, decreasing=TRUE)))
Source: local data frame [5 x 4]
Groups: customer_name
customer_name order_dates order_values my_ranks
1 John 2010-11-01 15 3
2 Bob 2008-03-25 12 1
3 Alex 2009-11-15 5 1
4 John 2012-08-06 15 2
5 John 2015-05-07 20 1
Ranking within each group
dplyr
way:
library(dplyr)
d %>%
arrange(group, y, z) %>%
group_by(group) %>%
mutate(rank = 1:n()) %>%
ungroup()
We first sort the data.frame by group
then y
and then z
, then group it by group
and then assign the rank for each observation.
Result:
# A tibble: 12 × 5
x y z group rank
<int> <dbl> <dbl> <int> <int>
1 1 0.00000 6.988904 1 1
2 1 0.00000 329.283431 1 2
3 1 0.00000 353.287515 1 3
4 0 35.51413 0.000000 1 4
5 0 47.87604 0.000000 1 5
6 0 272.62365 0.000000 1 6
7 1 0.00000 212.491666 2 1
8 1 0.00000 257.076377 2 2
9 1 0.00000 326.760675 2 3
10 1 0.00000 889.022577 2 4
11 0 48.02147 0.000000 2 5
12 0 84.97861 0.000000 2 6
Apply a rank across groups
You could try
library(dplyr)
data %>%
group_by(Grp) %>%
mutate(Rank = Value[which.max(YEAR)]) %>%
ungroup() %>%
mutate(Rank = dense_rank(-Rank))
# YEAR Grp Value Rank
# 1 2020 A 25 3
# 2 2019 A 24 3
# 3 2020 B 35 2
# 4 2019 B 34 2
# 5 2020 C 45 1
# 6 2019 C 44 1
Rank variable by group (dplyr)
The following produces the desired result as was specified.
library(dplyr)
by_species <- iris %>% arrange(Species, Sepal.Length) %>%
group_by(Species) %>%
mutate(rank = rank(Sepal.Length, ties.method = "first"))
by_species %>% filter(rank <= 3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species rank
## (dbl) (dbl) (dbl) (dbl) (fctr) (int)
##1 4.3 3.0 1.1 0.1 setosa 1
##2 4.4 2.9 1.4 0.2 setosa 2
##3 4.4 3.0 1.3 0.2 setosa 3
##4 4.9 2.4 3.3 1.0 versicolor 1
##5 5.0 2.0 3.5 1.0 versicolor 2
##6 5.0 2.3 3.3 1.0 versicolor 3
##7 4.9 2.5 4.5 1.7 virginica 1
##8 5.6 2.8 4.9 2.0 virginica 2
##9 5.7 2.5 5.0 2.0 virginica 3
by_species %>% slice(1:3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species rank
## (dbl) (dbl) (dbl) (dbl) (fctr) (int)
##1 4.3 3.0 1.1 0.1 setosa 1
##2 4.4 2.9 1.4 0.2 setosa 2
##3 4.4 3.0 1.3 0.2 setosa 3
##4 4.9 2.4 3.3 1.0 versicolor 1
##5 5.0 2.0 3.5 1.0 versicolor 2
##6 5.0 2.3 3.3 1.0 versicolor 3
##7 4.9 2.5 4.5 1.7 virginica 1
##8 5.6 2.8 4.9 2.0 virginica 2
##9 5.7 2.5 5.0 2.0 virginica 3
R data frame rank by groups (group by rank) with package dplyr
Had a similar issue, my answer was sorting on groups and the relevant ranked variable(s) in order to then use row_number() when using group_by.
# Sample dataset
df <- data.frame(group=rep(c("GROUP 1", "GROUP 2"),10),
value=as.integer(rnorm(20, mean=1000, sd=500)))
require(dplyr)
print.data.frame(df[0:10,])
group value
1 GROUP 1 1273
2 GROUP 2 1261
3 GROUP 1 1189
4 GROUP 2 1390
5 GROUP 1 1942
6 GROUP 2 1111
7 GROUP 1 530
8 GROUP 2 893
9 GROUP 1 997
10 GROUP 2 237
sorted <- df %>%
arrange(group, -value) %>%
group_by(group) %>%
mutate(rank=row_number())
print.data.frame(sorted)
group value rank
1 GROUP 1 1942 1
2 GROUP 1 1368 2
3 GROUP 1 1273 3
4 GROUP 1 1249 4
5 GROUP 1 1189 5
6 GROUP 1 997 6
7 GROUP 1 562 7
8 GROUP 1 535 8
9 GROUP 1 530 9
10 GROUP 1 1 10
11 GROUP 2 1472 1
12 GROUP 2 1390 2
13 GROUP 2 1281 3
14 GROUP 2 1261 4
15 GROUP 2 1111 5
16 GROUP 2 893 6
17 GROUP 2 774 7
18 GROUP 2 669 8
19 GROUP 2 631 9
20 GROUP 2 237 10
Rank within groups in R with special NA handling
We can group by 'B', rank
on 'C', specify the i
with a logical condition to select only the non-NA elements from 'C' and assign (:=
) the rank
values to create the 'RANK' column. By default, the rows that are not used i.e. NA will be NA in the new column
library(data.table)
setDT(df)[!is.na(C), RANK := rank(-C) , B]
df
# A B C RANK
# 1: A V1 1 4.0
# 2: A V2 2 3.5
# 3: A V3 3 3.0
# 4: B V1 5 1.0
# 5: B V2 2 3.5
# 6: B V3 NA NA
# 7: C V1 4 2.0
# 8: C V2 6 2.0
# 9: C V3 7 2.0
#10: D V1 3 3.0
#11: D V2 7 1.0
#12: D V3 8 1.0
Rank within group in for loop in R
Not sure if this is exactly what you wanted but with the data.frame
below this worked for me. Hope it helps
df <- data.frame(Category=c(rep("Orange",10), rep("Banana",10)),
Score.08.2007=c(runif(6),rep(NA,4),runif(4),rep(NA,2),runif(4)),
Score.09.2017=c(runif(5),rep(NA,3),runif(2),runif(4),rep(NA,4),runif(2)),
stringsAsFactors=F)
dplyr solution
library(dplyr)
eq_ranks <- function(theseCols, newCols, df){
theseCols <- enquo(theseCols)
df1 <- df %>%
group_by(Category) %>%
mutate_at(vars(!!theseCols), funs(rank(., na.last="keep"))) %>%
ungroup() %>%
select(-Category) %>%
setNames(newCols)
df2 <- cbind(df, df1)
return(df2)
}
aux <- colnames(df)[-1]
newCols <- sub("Score", "Rank", aux)
eq_ranks(aux,newCols,df)
Output
structure(list(Category = c("Orange", "Orange", "Orange", "Orange",
"Orange", "Orange", "Orange", "Orange", "Orange", "Orange", "Banana",
"Banana", "Banana", "Banana", "Banana", "Banana", "Banana", "Banana",
"Banana", "Banana"), Score.08.2007 = c(0.757087148027495, 0.202692255144939,
0.711121222469956, 0.121691921027377, 0.245488513959572, 0.14330437942408,
NA, NA, NA, NA, 0.239629415096715, 0.0589343772735447, 0.642288258532062,
0.876269212691113, NA, NA, 0.778914677444845, 0.79730882588774,
0.455274453619495, 0.410084082046524), Score.09.2017 = c(0.810870242770761,
0.604933290276676, 0.654723928077146, 0.353197271935642, 0.270260145887733,
NA, NA, NA, 0.99268406117335, 0.633493264438584, 0.213208135217428,
0.129372348077595, 0.478118034312502, 0.924074469832703, NA,
NA, NA, NA, 0.59876096714288, 0.976170694921166), Rank.08.2007 = c(6,
3, 5, 1, 4, 2, NA, NA, NA, NA, 2, 1, 5, 8, NA, NA, 6, 7, 4, 3
), Rank.09.2017 = c(6, 3, 5, 2, 1, NA, NA, NA, 7, 4, 2, 1, 3,
5, NA, NA, NA, NA, 4, 6)), .Names = c("Category", "Score.08.2007",
"Score.09.2017", "Rank.08.2007", "Rank.09.2017"), row.names = c(NA,
-20L), class = "data.frame")
Get ranking within each group for r dataframe
Looks like people forgot about your question. Hope this doesn't come too late ^^
library(dplyr)
df %>% group_by(Gene) %>% mutate(Rank = dense_rank(desc(Expression)))
> df
# A tibble: 7 x 4
# Groups: Gene [2]
Gene Expression Sample Rank
<chr> <dbl> <chr> <dbl>
1 ENSG00000000003 2.82 HSB671 1
2 ENSG00000000003 2.79 HSB431 2
3 ENSG00000000003 2.40 HSB618 3
4 ENSG00000000938 1.75 HSB671 1
5 ENSG00000000938 1.52 HSB670 2
6 ENSG00000000938 0.835 HSB414 3
7 ENSG00000000938 0.622 HSB459 4
Or with base R:
df$Rank <- ave(-df$Expression, df$Gene, FUN = rank)
Related Topics
R - How to Find Points Within Specific Contour
Install the Package That Has Been Removed from the Cran Repository Easily
Copy Upper Triangle to Lower Triangle for Several Matrices in a List
How to Do a Regression of a Series of Variables Without Typing Each Variable Name
About Gforce in Data.Table 1.9.2
Conditional Rolling Mean (Moving Average) on Irregular Time Series
Replace Missing Value with Previous Value
Coding Variable Values into Classes Using R
R - Store a Matrix into a Single Dataframe Cell
R-Shiny Using Reactive Renderui Value
Data.Table Alternative for Dplyr Case_When
Determining the Distance Between Two Zip Codes (Alternatives to Mapdist)
Add Density Lines to Histogram and Cumulative Histogram
Creating Professional Looking Powerpoints in R
How to Host a Shiny App on a Windows MAChine
Jitter If Multiple Outliers in Ggplot2 Boxplot
How to Find Useful R Tutorials with Various Implementations
Given a 2D Numeric "Height Map" Matrix in R, How to Find All Local Maxima