R - Add Column That Counts Sequentially Within Groups But Repeats for Duplicates

R - add column that counts sequentially within groups but repeats for duplicates

dense_rank it is

library(dplyr)
df %>%
group_by(section) %>%
mutate(desire=dense_rank(exhibit))
# section exhibit desired_result desire
#1 1 a 1 1
#2 1 b 2 2
#3 1 c 3 3
#4 1 c 3 3
#5 2 a 1 1
#6 2 b 2 2
#7 2 b 2 2
#8 2 c 3 3

How to add a column that counts duplicates in sequence?

After thinking about this question, I think the following would be the way to go. If you do not mind combining all program names, you could do the following. This is probably much better.

setDT(mydf)[, list(type = paste(program, collapse = "-")), by = name][,
list(total = .N), by = type]

# type total
#1: ffp-TP 2
#2: TP-ffp-wfd 1

If you want to separate program names, you can do that with cSplit() from the splitstackshape package.

setDT(mydf)[, list(type = paste(program, collapse = "-")), by = name][,
list(total = .N), by = type] -> temp

cSplit(temp, splitCols = "type", sep = "-")

# total type_1 type_2 type_3
#1: 2 ffp TP NA
#2: 1 TP ffp wfd

The equivalence of dplyr code is:

group_by(mydf, name) %>%
summarise(type = paste(program, collapse = "-")) %>%
count(type)

# type n
# (chr) (int)
#1 ffp-TP 2
#2 TP-ffp-wfd 1

DATA

mydf <- structure(list(name = c("John", "John", "Carlos", "Carlos", "Carlos", 
"Jen", "Jen"), program = c("ffp", "TP", "TP", "ffp", "wfd", "ffp",
"TP"), dateOfContact = c("10/11/2014", "10/27/2014", "11/19/2015",
"12/1/2015", "12/31/2015", "9/9/2014", "9/30/2014"), helperColumn = c(2L,
2L, 3L, 3L, 3L, 2L, 2L)), .Names = c("name", "program", "dateOfContact",
"helperColumn"), class = "data.frame", row.names = c(NA, -7L))

Sequence number for duplicate rows in r

One way to achive this with dplyr:

library(dplyr)

df1 %>%
# build grouping by combination of variables
dplyr::group_by(a, b) %>%
# add row number which works per group due to prior grouping
dplyr::mutate(duplicateID = dplyr::row_number()) %>%
# ungroup to prevent unexpected behaviour down stream
dplyr::ungroup()

# A tibble: 10 x 3
a b duplicateID
<chr> <dbl> <int>
1 one 3.5 1
2 one 3.5 2
3 one 3.5 3
4 one 2.5 1
5 two 3.5 1
6 two 3.5 2
7 three 1 1
8 four 2.2 1
9 four 7 1
10 four 7 2

Group observations chronologically and by group R / data.table

data.table::rleid(): Consecutive runs of identical values belong to the same group

dt[, tracker := rleid(track), by = student]

student year track tracker
1: 1 2001 Highschool 1
2: 1 2002 Highschool 1
3: 1 2003 Highschool 1
4: 1 2004 Vocational 2
5: 1 2005 Vocational 2
6: 1 2006 Uni 3
7: 1 2007 Vocational 4
8: 1 2008 Vocational 4
9: 2 2001 Vocational 1
10: 2 2002 Vocational 1
11: 2 2003 Highschool 2
12: 2 2004 Highschool 2
13: 2 2005 Highschool 2
14: 2 2006 Highschool 2
15: 2 2007 Vocational 3
16: 2 2008 Vocational 3

Without rleid() just for fun:

dt[, tracker := cumsum(shift(track, fill = track[1]) != track) + 1L, by = student]

Renumber groups sequentially

You can use cur_group_id().

library(dplyr)

df %>%
group_by(ActivityID) %>%
mutate(ActivityID2 = cur_group_id())

Count consecutive duplicates in a column

Use rle

x = c(0L, 1L, 0L, -1L, -1L, 0L, 1L, -1L, -1L, -1L, 1L)    
with(rle(x), lengths[values == -1])
#[1] 2 3

For all unique elements of x

with(rle(x), setNames(sapply(unique(values), function(x)
lengths[values == x]), nm = unique(values)))
#$`0`
#[1] 1 1 1

#$`1`
#[1] 1 1 1

#$`-1`
#[1] 2 3

Add id by group which resets to 1 in R

We could use ?rleid from the data.table package.

library(data.table)
setDT(df)[, id := rleid(gr2), by = gr1]
> df
gr1 gr2 id
1: A 1 1
2: A 1 1
3: A 2 2
4: B 3 1
5: B 4 2
6: B 4 2

ID measuring when variable takes on new value

You can use data.table rleid :

data.table::rleid(df$ID2_EVWIND)
#[1] 1 1 1 2 2 2 2 2 3 3 4 4 4 4 4 5 6 6 6 6 7 7 7 8 8 8 8 8 9 9 10 10 10 11

Another option is match :

match(df$ID2_EVWIND, unique(df$ID2_EVWIND))

Although in this case both of them give the expected output but their behavior is different when the numbers repeat. Take this example :

x <- c(1, 1, 2, 3, 3, 1, 1)
data.table::rleid(x)
#[1] 1 1 2 3 3 4 4

match(x, unique(x))
#[1] 1 1 2 3 3 1 1

You can select the option based on your requirement.

Fill Down Column with SubGroup Totals (R)

We can use n_distinct from the dplyr package.

library(dplyr)

dat2 <- dat %>%
group_by(Group) %>%
mutate(Count = n_distinct(UniqueID)) %>%
ungroup()
dat2
# # A tibble: 10 x 3
# Group UniqueID Count
# <chr> <chr> <int>
# 1 Grp1 1-A 3
# 2 Grp1 1-B 3
# 3 Grp1 1-B 3
# 4 Grp1 1-C 3
# 5 Grp2 2-A 2
# 6 Grp2 2-A 2
# 7 Grp2 2-B 2
# 8 Grp3 3-A 1
# 9 Grp3 3-A 1
# 10 Grp3 3-A 1

Data

dat <- read.table(text = "Group UniqueID
Grp1 '1-A'
Grp1 '1-B'
Grp1 '1-B'
Grp1 '1-C'
Grp2 '2-A'
Grp2 '2-A'
Grp2 '2-B'
Grp3 '3-A'
Grp3 '3-A'
Grp3 '3-A'",
header = TRUE, stringsAsFactors = FALSE)


Related Topics



Leave a reply



Submit