R - Add Column That Counts Sequentially Within Groups But Repeats for Duplicates

R - add column that counts sequentially within groups but repeats for duplicates

dense_rank it is

library(dplyr)
df %>% 
  group_by(section) %>% 
  mutate(desire=dense_rank(exhibit))
#  section exhibit desired_result desire
#1       1       a              1      1
#2       1       b              2      2
#3       1       c              3      3
#4       1       c              3      3
#5       2       a              1      1
#6       2       b              2      2
#7       2       b              2      2
#8       2       c              3      3

How to add a column that counts duplicates in sequence?

After thinking about this question, I think the following would be the way to go. If you do not mind combining all program names, you could do the following. This is probably much better.

setDT(mydf)[, list(type = paste(program, collapse = "-")), by = name][,
           list(total = .N), by = type]

#         type total
#1:     ffp-TP     2
#2: TP-ffp-wfd     1

If you want to separate program names, you can do that with cSplit() from the splitstackshape package.

setDT(mydf)[, list(type = paste(program, collapse = "-")), by = name][,
              list(total = .N), by = type] -> temp

cSplit(temp, splitCols = "type", sep = "-")

#   total type_1 type_2 type_3
#1:     2    ffp     TP     NA
#2:     1     TP    ffp    wfd

The equivalence of dplyr code is:

group_by(mydf, name) %>%
summarise(type = paste(program, collapse = "-")) %>%
count(type)

#        type     n
#       (chr) (int)
#1     ffp-TP     2
#2 TP-ffp-wfd     1

DATA

mydf <- structure(list(name = c("John", "John", "Carlos", "Carlos", "Carlos", 
"Jen", "Jen"), program = c("ffp", "TP", "TP", "ffp", "wfd", "ffp", 
"TP"), dateOfContact = c("10/11/2014", "10/27/2014", "11/19/2015", 
"12/1/2015", "12/31/2015", "9/9/2014", "9/30/2014"), helperColumn = c(2L, 
2L, 3L, 3L, 3L, 2L, 2L)), .Names = c("name", "program", "dateOfContact", 
"helperColumn"), class = "data.frame", row.names = c(NA, -7L))

Sequence number for duplicate rows in r

One way to achive this with dplyr:

library(dplyr)

df1 %>% 
    # build grouping by combination of variables
    dplyr::group_by(a, b) %>%
    # add row number which works per group due to prior grouping
    dplyr::mutate(duplicateID = dplyr::row_number()) %>%
    # ungroup to prevent unexpected behaviour down stream
    dplyr::ungroup()

# A tibble: 10 x 3
   a         b  duplicateID
   <chr> <dbl>     <int>
 1 one     3.5       1
 2 one     3.5       2
 3 one     3.5       3
 4 one     2.5       1
 5 two     3.5       1
 6 two     3.5       2
 7 three   1         1
 8 four    2.2       1
 9 four    7         1
10 four    7         2

Group observations chronologically and by group R / data.table

data.table::rleid(): Consecutive runs of identical values belong to the same group

dt[, tracker := rleid(track), by = student]

    student year      track tracker
 1:       1 2001 Highschool       1
 2:       1 2002 Highschool       1
 3:       1 2003 Highschool       1
 4:       1 2004 Vocational       2
 5:       1 2005 Vocational       2
 6:       1 2006        Uni       3
 7:       1 2007 Vocational       4
 8:       1 2008 Vocational       4
 9:       2 2001 Vocational       1
10:       2 2002 Vocational       1
11:       2 2003 Highschool       2
12:       2 2004 Highschool       2
13:       2 2005 Highschool       2
14:       2 2006 Highschool       2
15:       2 2007 Vocational       3
16:       2 2008 Vocational       3

Without rleid() just for fun:

dt[, tracker := cumsum(shift(track, fill = track[1]) != track) + 1L, by = student]

Renumber groups sequentially

You can use cur_group_id().

library(dplyr)

df %>% 
  group_by(ActivityID) %>% 
  mutate(ActivityID2 = cur_group_id())

Count consecutive duplicates in a column

Use rle

x = c(0L, 1L, 0L, -1L, -1L, 0L, 1L, -1L, -1L, -1L, 1L)    
with(rle(x), lengths[values == -1])
#[1] 2 3

For all unique elements of x

with(rle(x), setNames(sapply(unique(values), function(x)
                lengths[values == x]), nm = unique(values)))
#$`0`
#[1] 1 1 1

#$`1`
#[1] 1 1 1

#$`-1`
#[1] 2 3

Add id by group which resets to 1 in R

We could use ?rleid from the data.table package.

library(data.table)
setDT(df)[, id := rleid(gr2), by = gr1]
> df
   gr1 gr2 id
1:   A   1  1
2:   A   1  1
3:   A   2  2
4:   B   3  1
5:   B   4  2
6:   B   4  2

ID measuring when variable takes on new value

You can use data.table rleid :

data.table::rleid(df$ID2_EVWIND)
#[1]  1  1  1  2  2  2  2  2  3  3  4  4  4  4  4  5  6  6  6  6  7  7  7  8  8  8  8  8  9  9 10 10 10 11

Another option is match :

match(df$ID2_EVWIND, unique(df$ID2_EVWIND))

Although in this case both of them give the expected output but their behavior is different when the numbers repeat. Take this example :

x <- c(1, 1, 2, 3, 3, 1, 1)
data.table::rleid(x)
#[1] 1 1 2 3 3 4 4

match(x, unique(x))
#[1] 1 1 2 3 3 1 1

You can select the option based on your requirement.

Fill Down Column with SubGroup Totals (R)

We can use n_distinct from the dplyr package.

library(dplyr)

dat2 <- dat %>%
  group_by(Group) %>%
  mutate(Count = n_distinct(UniqueID)) %>%
  ungroup()
dat2
# # A tibble: 10 x 3
#    Group UniqueID Count
#    <chr> <chr>    <int>
#  1 Grp1  1-A          3
#  2 Grp1  1-B          3
#  3 Grp1  1-B          3
#  4 Grp1  1-C          3
#  5 Grp2  2-A          2
#  6 Grp2  2-A          2
#  7 Grp2  2-B          2
#  8 Grp3  3-A          1
#  9 Grp3  3-A          1
# 10 Grp3  3-A          1

Data

dat <- read.table(text = "Group UniqueID
Grp1  '1-A'
Grp1  '1-B'
Grp1  '1-B'
Grp1  '1-C'
Grp2  '2-A'
Grp2  '2-A'
Grp2  '2-B'
Grp3  '3-A'
Grp3  '3-A'
Grp3  '3-A'",
                  header = TRUE, stringsAsFactors = FALSE)

R - Add Column That Counts Sequentially Within Groups But Repeats for Duplicates