Create a Ranking Variable with Dplyr

Create a ranking variable with dplyr?

It sounds like you're looking for dense_rank from "dplyr" -- but applied in a reverse order than what rank normally does.

Try this:

df %>% mutate(rank = dense_rank(desc(score)))
#   name score rank
# 1    A    10    1
# 2    B    10    1
# 3    C     9    2
# 4    D     8    3

Rank variable by group (dplyr)

The following produces the desired result as was specified.

library(dplyr)

by_species <- iris %>% arrange(Species, Sepal.Length) %>%
    group_by(Species) %>% 
    mutate(rank = rank(Sepal.Length, ties.method = "first"))

by_species %>% filter(rank <= 3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
##  Sepal.Length Sepal.Width Petal.Length Petal.Width    Species  rank
##         (dbl)       (dbl)        (dbl)       (dbl)     (fctr) (int)
##1          4.3         3.0          1.1         0.1     setosa     1
##2          4.4         2.9          1.4         0.2     setosa     2
##3          4.4         3.0          1.3         0.2     setosa     3
##4          4.9         2.4          3.3         1.0 versicolor     1
##5          5.0         2.0          3.5         1.0 versicolor     2
##6          5.0         2.3          3.3         1.0 versicolor     3
##7          4.9         2.5          4.5         1.7  virginica     1
##8          5.6         2.8          4.9         2.0  virginica     2
##9          5.7         2.5          5.0         2.0  virginica     3

by_species %>% slice(1:3)
##Source: local data frame [9 x 6]
##Groups: Species [3]
##
##  Sepal.Length Sepal.Width Petal.Length Petal.Width    Species  rank
##         (dbl)       (dbl)        (dbl)       (dbl)     (fctr) (int)
##1          4.3         3.0          1.1         0.1     setosa     1
##2          4.4         2.9          1.4         0.2     setosa     2
##3          4.4         3.0          1.3         0.2     setosa     3
##4          4.9         2.4          3.3         1.0 versicolor     1
##5          5.0         2.0          3.5         1.0 versicolor     2
##6          5.0         2.3          3.3         1.0 versicolor     3
##7          4.9         2.5          4.5         1.7  virginica     1
##8          5.6         2.8          4.9         2.0  virginica     2
##9          5.7         2.5          5.0         2.0  virginica     3

R: how to create a ranking variable for each subject excluding NA values

Here is one option. After grouping by 'Subject', replace the non-Na elements in 'FixationDuration' by the row_number ordered with 'Time' values where the 'FixationDuration' is non-NA

library(dplyr)
df1 %>% 
   group_by(Subject) %>% 
   mutate(OrdinalFixationNumber = replace(FixationDuration,
      !is.na(FixationDuration), row_number(Time[!is.na(FixationDuration)])))
# A tibble: 13 x 4
# Groups:   Subject [3]
#   Subject FixationDuration  Time OrdinalFixationNumber
#     <int>            <dbl> <dbl>                 <dbl>
# 1       1            NA     1                       NA
# 2       1             0.33  2                        1
# 3       1            NA     3                       NA
# 4       1             0.15  4.22                     2
# 5       1             3.2   5.93                     3
# 6       2             6.88  1                        1
# 7       2             9.23  3                        2
# 8       2             0.77  3.01                     3
# 9       2             1.88  4.91                     4
#10      15             6.22  1                        1
#11      15            NA     1.56                    NA
#12      15            NA     1.76                    NA
#13      15             0.24  2.39                     2

In data.table, this can be done with

library(data.table)
setDT(df1)[!is.na(FixationDuration), OrdinalFixationNumber := 
          seq_len(.N)[order(Time)], by = Subject]
df1
#    Subject FixationDuration Time OrdinalFixationNumber
# 1:       1               NA 1.00                    NA
# 2:       1             0.33 2.00                     1
# 3:       1               NA 3.00                    NA
# 4:       1             0.15 4.22                     2
# 5:       1             3.20 5.93                     3
# 6:       2             6.88 1.00                     1
# 7:       2             9.23 3.00                     2
# 8:       2             0.77 3.01                     3
# 9:       2             1.88 4.91                     4
#10:      15             6.22 1.00                     1
#11:      15               NA 1.56                    NA
#12:      15               NA 1.76                    NA
#13:      15             0.24 2.39                     2

data

df1 <- structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
15L, 15L, 15L, 15L), FixationDuration = c(NA, 0.33, NA, 0.15, 
3.2, 6.88, 9.23, 0.77, 1.88, 6.22, NA, NA, 0.24), Time = c(1, 
2, 3, 4.22, 5.93, 1, 3, 3.01, 4.91, 1, 1.56, 1.76, 2.39)), 
class = "data.frame", row.names = c(NA, 
-13L))

Create descending ranks for a set of columns using dplyr

We need to place this inside the funs

out1 <- USArrests %>%
           tibble::rownames_to_column() %>% 
           arrange_at(vars(Murder:Rape), funs(desc))

Checking with applying desc on each column

out2 <-  USArrests %>% 
             tibble::rownames_to_column() %>% 
             arrange(desc(Murder), desc(Assault), desc(UrbanPop), desc(Rape))
identical(out1, out2)
#[1] TRUE

Based on the above, we can make changes in the rank_f

out3 <-  out2 %>%
             mutate_at(vars(Murder:Rape), min_rank) 
rank_f <- function(ds, cols, fs){
          ds %>%
               arrange_at(vars(!!!cols), funs(desc))%>%
               mutate_at(vars(!!!cols), funs(!!!fs))
        }
out4 <- USArrests %>%
             tibble::rownames_to_column()%>%
             rank_f(quos((Murder:Rape)),quos(min_rank))

identical(out3, out4)
#[1] TRUE

Update

Based on the comments from OP, we don't need to do any arrange, we can directly apply min_rank by converting the column values to negative

USArrests %>% 
   tibble::rownames_to_column() %>% 
   mutate_at(vars(Murder:Rape), funs(min_rank(-.)))

Get rank for every column using dplyr

We may use across - loop over the numeric column, get the rank and create new column names by adding a suffix in .names

library(dplyr)
out <- mtcars %>% 
   mutate(across(where(is.numeric), rank, .names = "{.col}_rank"))

-output

> head(out, 2)
              mpg cyl disp  hp drat    wt  qsec vs am gear carb mpg_rank cyl_rank disp_rank hp_rank drat_rank wt_rank qsec_rank vs_rank
Mazda RX4      21   6  160 110  3.9 2.620 16.46  0  1    4    4     19.5       15      13.5      13      21.5       9       6.0     9.5
Mazda RX4 Wag  21   6  160 110  3.9 2.875 17.02  0  1    4    4     19.5       15      13.5      13      21.5      12      10.5     9.5
              am_rank gear_rank carb_rank
Mazda RX4          26      21.5      25.5
Mazda RX4 Wag      26      21.5      25.5

By default, if there are ties, then the rank may take average

rank(x, na.last = TRUE,
ties.method = c("average", "first", "last", "random", "max", "min"))

So, it may be better to specify ties.method or may use dense_rank

out <- mtcars %>% 
   mutate(across(where(is.numeric), dense_rank, .names = "{.col}_rank"))

-output

> head(out, 2)
              mpg cyl disp  hp drat    wt  qsec vs am gear carb mpg_rank cyl_rank disp_rank hp_rank drat_rank wt_rank qsec_rank vs_rank
Mazda RX4      21   6  160 110  3.9 2.620 16.46  0  1    4    4       16        2        13      11        16       9         6       1
Mazda RX4 Wag  21   6  160 110  3.9 2.875 17.02  0  1    4    4       16        2        13      11        16      12        10       1
              am_rank gear_rank carb_rank
Mazda RX4           2         2         4
Mazda RX4 Wag       2         2         4

Regarding the OP's function, it uses df as input dataset which is not an argument to the function and by default df is a function in base R. Also, the rank= returns each of the column name to be rank. The function could be modified as

cols <- colnames(mtcars)

get_rank <- function(data, col){
  
  data %>% 
   transmute(!! stringr::str_c(col, "_rank") :=rank(.data[[col]]))
}

lapply(cols, get_rank, data = mtcars) %>%
   bind_cols(mtcars, .)

How to rank numeric data by rows in a dataframe in r?

We may use pmap to loop over each of the rows (would be fast compared to rowwise) and apply dense_rank

library(purrr)
library(dplyr)
df %>% 
    pmap_dfr(~ setNames(dense_rank(-c(...)), names(c(...))))

-output

# A tibble: 5 x 3
      a     b     c
  <int> <int> <int>
1     1     2    NA
2     1     2    NA
3     2     1    NA
4     1    NA    NA
5     1     2     1

Or a faster option may be using dapply from collapse

library(collapse)
library(data.table)
dapply(df, MARGIN = 1, FUN = frank, ties.method = 'dense', na.last = "keep")
  a  b  c
1 2  1 NA
2 2  1 NA
3 1  2 NA
4 1 NA NA
5 2  1  2

Apply a rank across groups

You could try

library(dplyr)

data %>%
  group_by(Grp) %>%
  mutate(Rank = Value[which.max(YEAR)]) %>%
  ungroup() %>% 
  mutate(Rank = dense_rank(-Rank))

#   YEAR Grp Value Rank
# 1 2020   A    25    3
# 2 2019   A    24    3
# 3 2020   B    35    2
# 4 2019   B    34    2
# 5 2020   C    45    1
# 6 2019   C    44    1

Create a Ranking Variable with Dplyr