R - Group by Variable and Then Assign a Unique Id

R - Group by variable and then assign a unique ID

dplyr has a group_indices function for creating unique group IDs

library(dplyr)
data <- data.frame(personal_id = c("111-111-111", "999-999-999", "222-222-222", "111-111-111"),
                       gender = c("M", "F", "M", "M"),
                       temperature = c(99.6, 98.2, 97.8, 95.5))

data$group_id <- data %>% group_indices(personal_id) 
data <- data %>% select(-personal_id)

data
  gender temperature group_id
1      M        99.6        1
2      F        98.2        3
3      M        97.8        2
4      M        95.5        1

Or within the same pipeline (https://github.com/tidyverse/dplyr/issues/2160):

data %>% 
    mutate(group_id = group_indices(., personal_id))

Assign unique id to consecutive rows within a grouping variable in dplyr

We can use gl

library(dplyr)
df <- df %>%
    group_by(group) %>% 
    mutate(id = as.integer(gl(n(), 2, n()))) %>%
    ungroup

Group dataframe rows by creating a unique ID column based on the amount of time passed between entries and variable values

Here's a dplyr approach that calculates the gap and rolling avg gap within each Name/Item group, then flags large gaps, and assigns a new group for each large gap or change in Name or Item.

df1 %>%
  group_by(Name,Item) %>%
  mutate(purch_num = row_number(),
         time_since_first = Date - first(Date),
         gap = Date - lag(Date, default = as.Date(-Inf)),
         avg_gap = time_since_first / (purch_num-1),
         new_grp_flag = gap > 180 | gap > 3*avg_gap) %>%
  ungroup() %>%
  mutate(group = cumsum(new_grp_flag))

How to assign a unique ID number to each group of identical values in a column

How about

df2 <- transform(df,id=as.numeric(factor(sample)))

I think this (cribbed from Add ID column by group) should be slightly more efficient, although perhaps a little harder to remember:

df3 <- transform(df, id=match(sample, unique(sample)))
all.equal(df2,df3)  ## TRUE

If you want to do this in tidyverse:

library(dplyr)
df %>% group_by(sample) %>% mutate(id=cur_group_id())

group rows (ID) and then assign a treatment for each group ID

You can use base R to do this with a merge:

set.seed(1)

random_trt <- data.frame(ID = unique(ID),
                         New_Treatment = sample(c("a", "b", "c"), size = length(unique(ID)), replace = T))

merge(data, 
      random_trt, 
      by = "ID",
      all.x = T)

   ID Treatment New_Treatment
1   1         a             a
2   1         a             a
3   1         a             a
4   2         b             c
5   2         b             c
6   2         b             c
7   2         b             c
8   2         b             c
9   3         c             a
10  3         c             a
11  4         a             b
12  4         a             b
13  4         a             b
14  4         a             b
15  5         b             a
16  6         c             c
17  6         c             c
18  6         c             c
19  7         a             c
20  7         a             c

You use sample to randomly sample your treatment vector for each unique ID. Then you merge that as a one-to-many merge so that it repeats for each ID in data.

Using dplyr:

set.seed(1)
data %>%
  dplyr::group_by(ID) %>% 
  dplyr::mutate(New_Treatment = sample(c("a", "b", "c"), size = 1))

      ID Treatment New_Treatment
   <dbl> <chr>     <chr>        
 1     1 a         a            
 2     1 a         a            
 3     1 a         a            
 4     2 b         c            
 5     2 b         c            
 6     2 b         c            
 7     2 b         c            
 8     2 b         c            
 9     3 c         a            
10     3 c         a            
11     4 a         b            
12     4 a         b            
13     4 a         b            
14     4 a         b            
15     5 b         a            
16     6 c         c            
17     6 c         c            
18     6 c         c            
19     7 a         c            
20     7 a         c

generate id within group

You can use data.table::rleid(), i.e.

library(dplyr)

df %>% 
 group_by(VarA) %>% 
 mutate(id = data.table::rleid(VarB))

# A tibble: 6 x 3
# Groups:   VarA [2]
#  VarA  VarB     id
#  <chr> <chr> <int>
#1 A     aaaa      1
#2 A     aaaa      1
#3 B     bbbb      1
#4 B     bbbb      1
#5 B     bbbb      1
#6 B     cccc      2

Assign unique ID to distinct values within Group with dplyr

Using data.table and sprintf:

library(data.table)
setDT(dat)[, ID := sprintf('%s.%02d.%03d', 
                           Emp, rleid(Color), rowid(rleid(Color))), 
           by = Emp]

you get:

> dat
   Emp  Color       ID
1:   A    Red A.01.001
2:   A  Green A.02.001
3:   A  Green A.02.002
4:   B Orange B.01.001
5:   B Yellow B.02.001
6:   C  Brown C.01.001

How this works:

You convert dat to a data.table with setDT()
Group by Emp.
And create the ID-variable with the sprintf-function. With sprintf you paste several vector easily together according to a specified format.
The use of := means that the data.table is updated by reference.
%s indicates that a string is to be used in the first part (which is Emp). %02d & %03d indicates that a number needs to have two or three digits with a leading zero(s) when needed. The dots in between will taken literally and thus in cluded in the resulting string.

Adressing the comment of @jsta, if the values in the Color-column are not sequential you can use:

setDT(dat)[, r := as.integer(factor(Color, levels = unique(Color))), by = Emp
           ][, ID := sprintf('%s.%02d.%03d', 
                             Emp, r, rowid(r)), 
             by = Emp][, r:= NULL]

This will also maintain the order in which the Color column is presented. Instead of as.integer(factor(Color, levels = unique(Color))) you can also use match(Color, unique(Color)) as shown by akrun.

Implementing the above on a bit larger dataset to illustrate:

dat2 <- rbindlist(list(dat,dat))
dat2[, r := match(Color, unique(Color)), by = Emp
     ][, ID := sprintf('%s.%02d.%03d', 
                     Emp, r, rowid(r)), 
     by = Emp]

gets you:

> dat2
    Emp  Color r       ID
 1:   A    Red 1 A.01.001
 2:   A  Green 2 A.02.001
 3:   A  Green 2 A.02.002
 4:   B Orange 1 B.01.001
 5:   B Yellow 2 B.02.001
 6:   C  Brown 1 C.01.001
 7:   A    Red 1 A.01.002
 8:   A  Green 2 A.02.003
 9:   A  Green 2 A.02.004
10:   B Orange 1 B.01.002
11:   B Yellow 2 B.02.002
12:   C  Brown 1 C.01.002

assign a unique ID number for every repeated value in a column R

It can be done using rowid

library(data.table)
library(dplyr)
weighted_df %>% 
   mutate(ID = rowid(Name))

-output

#     Name        Room1       Room2        Room3 ID
#1    H001  0.579649851  0.84602529  0.620850211  1
#2    H001  0.579649851  0.84602529  0.620850211  2
#3    H001  0.579649851  0.84602529  0.620850211  3
#4    H001  0.579649851  0.84602529  0.620850211  4
#5    H001  0.579649851  0.84602529  0.620850211  5
#6    H001  0.579649851  0.84602529  0.620850211  6
#7    H001  0.579649851  0.84602529  0.620850211  7
#8    H001  0.579649851  0.84602529  0.620850211  8
#9    H001  0.579649851  0.84602529  0.620850211  9
#10   H001  0.579649851  0.84602529  0.620850211 10
#11   H001  0.579649851  0.84602529  0.620850211 11
#12   H001  0.579649851  0.84602529  0.620850211 12
#13   H001  0.579649851  0.84602529  0.620850211 13
#14   H001  0.579649851  0.84602529  0.620850211 14
#15   H001  0.579649851  0.84602529  0.620850211 15
#16   H001  0.579649851  0.84602529  0.620850211 16
#17   H001  0.579649851  0.84602529  0.620850211 17
#18   H002  1.457267473 -1.18612874  0.553957293  1
#19   H002  1.457267473 -1.18612874  0.553957293  2
# ...

R: Create numbering within each group

A) The dplyr package offers group_indices() for adding unique group indentifiers:

library(dplyr)

df$number <- df %>% 
  group_indices(ID)
df

# A tibble: 10 × 3
   study    ID number
   <chr> <dbl>  <int>
 1 A         1      1
 2 B         1      1
 3 C         1      1
 4 A         5      2
 5 B         5      2
...

B) You can drop observations where the group size is less than 3 (i.e., "A", "B" and "C") with filter():

df %>% 
  group_by(ID) %>% 
  filter(n() == 3)

# A tibble: 6 × 3
# Groups:   ID [2]
  study    ID number
  <chr> <dbl>  <int>
1 A         1      1
2 B         1      1
3 C         1      1
4 A         7      3
5 B         7      3
6 C         7      3

R: add a dplyr group label as a number

I think in this case something as simple as :

df %>%
  mutate(group_no = as.integer(name))

will work

# A tibble: 20 x 4
# Groups:   id [2]
   id    name     val group_no
   <fct> <fct>  <dbl>    <int>
 1 a     N1    0.647         1
 2 a     N1    0.530         1
 3 a     N1    0.245         1
 4 a     N2    0.693         2
 5 a     N2    0.478         2
 6 a     N2    0.861         2
 7 a     N3    0.821         3
 8 a     N3    0.0995        3
 9 a     N3    0.662         3
10 b     N1    0.553         1
11 b     N1    0.0233        1
12 b     N1    0.519         1
13 b     N2    0.783         2
14 b     N2    0.789         2
15 b     N2    0.477         2
16 b     N2    0.438         2
17 b     N2    0.407         2
18 b     N3    0.732         3
19 b     N3    0.0707        3
20 b     N3    0.316         3

R - Group by Variable and Then Assign a Unique Id