Create a ranking variable with dplyr?
It sounds like you're looking for dense_rank
from "dplyr" -- but applied in a reverse order than what rank
normally does.
Try this:
df %>% mutate(rank = dense_rank(desc(score)))
# name score rank
# 1 A 10 1
# 2 B 10 1
# 3 C 9 2
# 4 D 8 3
Rank variable by group (dplyr)
The following produces the desired result as was specified.
library(dplyr)
by_species <- iris %>% arrange(Species, Sepal.Length) %>%
group_by(Species) %>%
mutate(rank = rank(Sepal.Length, ties.method = "first"))
by_species %>% filter(rank <= 3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species rank
## (dbl) (dbl) (dbl) (dbl) (fctr) (int)
##1 4.3 3.0 1.1 0.1 setosa 1
##2 4.4 2.9 1.4 0.2 setosa 2
##3 4.4 3.0 1.3 0.2 setosa 3
##4 4.9 2.4 3.3 1.0 versicolor 1
##5 5.0 2.0 3.5 1.0 versicolor 2
##6 5.0 2.3 3.3 1.0 versicolor 3
##7 4.9 2.5 4.5 1.7 virginica 1
##8 5.6 2.8 4.9 2.0 virginica 2
##9 5.7 2.5 5.0 2.0 virginica 3
by_species %>% slice(1:3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species rank
## (dbl) (dbl) (dbl) (dbl) (fctr) (int)
##1 4.3 3.0 1.1 0.1 setosa 1
##2 4.4 2.9 1.4 0.2 setosa 2
##3 4.4 3.0 1.3 0.2 setosa 3
##4 4.9 2.4 3.3 1.0 versicolor 1
##5 5.0 2.0 3.5 1.0 versicolor 2
##6 5.0 2.3 3.3 1.0 versicolor 3
##7 4.9 2.5 4.5 1.7 virginica 1
##8 5.6 2.8 4.9 2.0 virginica 2
##9 5.7 2.5 5.0 2.0 virginica 3
R: how to create a ranking variable for each subject excluding NA values
Here is one option. After grouping by 'Subject', replace
the non-Na elements in 'FixationDuration' by the row_number
ordered with 'Time' values where the 'FixationDuration' is non-NA
library(dplyr)
df1 %>%
group_by(Subject) %>%
mutate(OrdinalFixationNumber = replace(FixationDuration,
!is.na(FixationDuration), row_number(Time[!is.na(FixationDuration)])))
# A tibble: 13 x 4
# Groups: Subject [3]
# Subject FixationDuration Time OrdinalFixationNumber
# <int> <dbl> <dbl> <dbl>
# 1 1 NA 1 NA
# 2 1 0.33 2 1
# 3 1 NA 3 NA
# 4 1 0.15 4.22 2
# 5 1 3.2 5.93 3
# 6 2 6.88 1 1
# 7 2 9.23 3 2
# 8 2 0.77 3.01 3
# 9 2 1.88 4.91 4
#10 15 6.22 1 1
#11 15 NA 1.56 NA
#12 15 NA 1.76 NA
#13 15 0.24 2.39 2
In data.table
, this can be done with
library(data.table)
setDT(df1)[!is.na(FixationDuration), OrdinalFixationNumber :=
seq_len(.N)[order(Time)], by = Subject]
df1
# Subject FixationDuration Time OrdinalFixationNumber
# 1: 1 NA 1.00 NA
# 2: 1 0.33 2.00 1
# 3: 1 NA 3.00 NA
# 4: 1 0.15 4.22 2
# 5: 1 3.20 5.93 3
# 6: 2 6.88 1.00 1
# 7: 2 9.23 3.00 2
# 8: 2 0.77 3.01 3
# 9: 2 1.88 4.91 4
#10: 15 6.22 1.00 1
#11: 15 NA 1.56 NA
#12: 15 NA 1.76 NA
#13: 15 0.24 2.39 2
data
df1 <- structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
15L, 15L, 15L, 15L), FixationDuration = c(NA, 0.33, NA, 0.15,
3.2, 6.88, 9.23, 0.77, 1.88, 6.22, NA, NA, 0.24), Time = c(1,
2, 3, 4.22, 5.93, 1, 3, 3.01, 4.91, 1, 1.56, 1.76, 2.39)),
class = "data.frame", row.names = c(NA,
-13L))
Create descending ranks for a set of columns using dplyr
We need to place this inside the funs
out1 <- USArrests %>%
tibble::rownames_to_column() %>%
arrange_at(vars(Murder:Rape), funs(desc))
Checking with applying desc
on each column
out2 <- USArrests %>%
tibble::rownames_to_column() %>%
arrange(desc(Murder), desc(Assault), desc(UrbanPop), desc(Rape))
identical(out1, out2)
#[1] TRUE
Based on the above, we can make changes in the rank_f
out3 <- out2 %>%
mutate_at(vars(Murder:Rape), min_rank)
rank_f <- function(ds, cols, fs){
ds %>%
arrange_at(vars(!!!cols), funs(desc))%>%
mutate_at(vars(!!!cols), funs(!!!fs))
}
out4 <- USArrests %>%
tibble::rownames_to_column()%>%
rank_f(quos((Murder:Rape)),quos(min_rank))
identical(out3, out4)
#[1] TRUE
Update
Based on the comments from OP, we don't need to do any arrange
, we can directly apply min_rank
by converting the column values to negative
USArrests %>%
tibble::rownames_to_column() %>%
mutate_at(vars(Murder:Rape), funs(min_rank(-.)))
Get rank for every column using dplyr
We may use across
- loop over the numeric column, get the rank
and create new column names by adding a suffix in .names
library(dplyr)
out <- mtcars %>%
mutate(across(where(is.numeric), rank, .names = "{.col}_rank"))
-output
> head(out, 2)
mpg cyl disp hp drat wt qsec vs am gear carb mpg_rank cyl_rank disp_rank hp_rank drat_rank wt_rank qsec_rank vs_rank
Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4 19.5 15 13.5 13 21.5 9 6.0 9.5
Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4 19.5 15 13.5 13 21.5 12 10.5 9.5
am_rank gear_rank carb_rank
Mazda RX4 26 21.5 25.5
Mazda RX4 Wag 26 21.5 25.5
By default, if there are ties, then the rank
may take average
rank(x, na.last = TRUE,
ties.method = c("average", "first", "last", "random", "max", "min"))
So, it may be better to specify ties.method
or may use dense_rank
out <- mtcars %>%
mutate(across(where(is.numeric), dense_rank, .names = "{.col}_rank"))
-output
> head(out, 2)
mpg cyl disp hp drat wt qsec vs am gear carb mpg_rank cyl_rank disp_rank hp_rank drat_rank wt_rank qsec_rank vs_rank
Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4 16 2 13 11 16 9 6 1
Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4 16 2 13 11 16 12 10 1
am_rank gear_rank carb_rank
Mazda RX4 2 2 4
Mazda RX4 Wag 2 2 4
Regarding the OP's function, it uses df
as input dataset which is not an argument to the function and by default df
is a function in base R
. Also, the rank=
returns each of the column name to be rank
. The function could be modified as
cols <- colnames(mtcars)
get_rank <- function(data, col){
data %>%
transmute(!! stringr::str_c(col, "_rank") :=rank(.data[[col]]))
}
lapply(cols, get_rank, data = mtcars) %>%
bind_cols(mtcars, .)
How to rank numeric data by rows in a dataframe in r?
We may use pmap
to loop over each of the rows (would be fast compared to rowwise
) and apply dense_rank
library(purrr)
library(dplyr)
df %>%
pmap_dfr(~ setNames(dense_rank(-c(...)), names(c(...))))
-output
# A tibble: 5 x 3
a b c
<int> <int> <int>
1 1 2 NA
2 1 2 NA
3 2 1 NA
4 1 NA NA
5 1 2 1
Or a faster option may be using dapply
from collapse
library(collapse)
library(data.table)
dapply(df, MARGIN = 1, FUN = frank, ties.method = 'dense', na.last = "keep")
a b c
1 2 1 NA
2 2 1 NA
3 1 2 NA
4 1 NA NA
5 2 1 2
Apply a rank across groups
You could try
library(dplyr)
data %>%
group_by(Grp) %>%
mutate(Rank = Value[which.max(YEAR)]) %>%
ungroup() %>%
mutate(Rank = dense_rank(-Rank))
# YEAR Grp Value Rank
# 1 2020 A 25 3
# 2 2019 A 24 3
# 3 2020 B 35 2
# 4 2019 B 34 2
# 5 2020 C 45 1
# 6 2019 C 44 1
Related Topics
Change the Position of the Strip Label in Ggplot from the Top to the Bottom
Why Has Data.Table Defined := Rather Than Overloading <-
Filling in Missing (Blanks) in a Data Table, Per Category - Backwards and Forwards
Error: Vector Memory Exhausted (Limit Reached) R 3.5.0 MACos
R: Merge Two Irregular Time Series
Raw Text Strings for File Paths in R
Removing Specific Rows from a Dataframe
Export Data Frames to Excel via Xlsx with Conditional Formatting
R Command Line Passing a Filename to Script in Arguments (Windows)
Non-Numeric Argument to Binary Operator Error in R
What Does the @ Symbol Mean in R
The Simplest Way to Convert a List with Various Length Vectors to a Data.Frame in R
Adding New Column with Conditional Values Using Ifelse
Leaflet Legend for Custom Markers in R
Plotting Multiple Time Series on the Same Plot Using Ggplot()