R - add column that counts sequentially within groups but repeats for duplicates
dense_rank
it is
library(dplyr)
df %>%
group_by(section) %>%
mutate(desire=dense_rank(exhibit))
# section exhibit desired_result desire
#1 1 a 1 1
#2 1 b 2 2
#3 1 c 3 3
#4 1 c 3 3
#5 2 a 1 1
#6 2 b 2 2
#7 2 b 2 2
#8 2 c 3 3
How to add a column that counts duplicates in sequence?
After thinking about this question, I think the following would be the way to go. If you do not mind combining all program names, you could do the following. This is probably much better.
setDT(mydf)[, list(type = paste(program, collapse = "-")), by = name][,
list(total = .N), by = type]
# type total
#1: ffp-TP 2
#2: TP-ffp-wfd 1
If you want to separate program names, you can do that with cSplit()
from the splitstackshape
package.
setDT(mydf)[, list(type = paste(program, collapse = "-")), by = name][,
list(total = .N), by = type] -> temp
cSplit(temp, splitCols = "type", sep = "-")
# total type_1 type_2 type_3
#1: 2 ffp TP NA
#2: 1 TP ffp wfd
The equivalence of dplyr code is:
group_by(mydf, name) %>%
summarise(type = paste(program, collapse = "-")) %>%
count(type)
# type n
# (chr) (int)
#1 ffp-TP 2
#2 TP-ffp-wfd 1
DATA
mydf <- structure(list(name = c("John", "John", "Carlos", "Carlos", "Carlos",
"Jen", "Jen"), program = c("ffp", "TP", "TP", "ffp", "wfd", "ffp",
"TP"), dateOfContact = c("10/11/2014", "10/27/2014", "11/19/2015",
"12/1/2015", "12/31/2015", "9/9/2014", "9/30/2014"), helperColumn = c(2L,
2L, 3L, 3L, 3L, 2L, 2L)), .Names = c("name", "program", "dateOfContact",
"helperColumn"), class = "data.frame", row.names = c(NA, -7L))
Sequence number for duplicate rows in r
One way to achive this with dplyr
:
library(dplyr)
df1 %>%
# build grouping by combination of variables
dplyr::group_by(a, b) %>%
# add row number which works per group due to prior grouping
dplyr::mutate(duplicateID = dplyr::row_number()) %>%
# ungroup to prevent unexpected behaviour down stream
dplyr::ungroup()
# A tibble: 10 x 3
a b duplicateID
<chr> <dbl> <int>
1 one 3.5 1
2 one 3.5 2
3 one 3.5 3
4 one 2.5 1
5 two 3.5 1
6 two 3.5 2
7 three 1 1
8 four 2.2 1
9 four 7 1
10 four 7 2
Group observations chronologically and by group R / data.table
data.table::rleid()
: Consecutive runs of identical values belong to the same group
dt[, tracker := rleid(track), by = student]
student year track tracker
1: 1 2001 Highschool 1
2: 1 2002 Highschool 1
3: 1 2003 Highschool 1
4: 1 2004 Vocational 2
5: 1 2005 Vocational 2
6: 1 2006 Uni 3
7: 1 2007 Vocational 4
8: 1 2008 Vocational 4
9: 2 2001 Vocational 1
10: 2 2002 Vocational 1
11: 2 2003 Highschool 2
12: 2 2004 Highschool 2
13: 2 2005 Highschool 2
14: 2 2006 Highschool 2
15: 2 2007 Vocational 3
16: 2 2008 Vocational 3
Without rleid()
just for fun:
dt[, tracker := cumsum(shift(track, fill = track[1]) != track) + 1L, by = student]
Renumber groups sequentially
You can use cur_group_id()
.
library(dplyr)
df %>%
group_by(ActivityID) %>%
mutate(ActivityID2 = cur_group_id())
Count consecutive duplicates in a column
Use rle
x = c(0L, 1L, 0L, -1L, -1L, 0L, 1L, -1L, -1L, -1L, 1L)
with(rle(x), lengths[values == -1])
#[1] 2 3
For all unique
elements of x
with(rle(x), setNames(sapply(unique(values), function(x)
lengths[values == x]), nm = unique(values)))
#$`0`
#[1] 1 1 1
#$`1`
#[1] 1 1 1
#$`-1`
#[1] 2 3
Add id by group which resets to 1 in R
We could use ?rleid
from the data.table
package.
library(data.table)
setDT(df)[, id := rleid(gr2), by = gr1]
> df
gr1 gr2 id
1: A 1 1
2: A 1 1
3: A 2 2
4: B 3 1
5: B 4 2
6: B 4 2
ID measuring when variable takes on new value
You can use data.table
rleid
:
data.table::rleid(df$ID2_EVWIND)
#[1] 1 1 1 2 2 2 2 2 3 3 4 4 4 4 4 5 6 6 6 6 7 7 7 8 8 8 8 8 9 9 10 10 10 11
Another option is match
:
match(df$ID2_EVWIND, unique(df$ID2_EVWIND))
Although in this case both of them give the expected output but their behavior is different when the numbers repeat. Take this example :
x <- c(1, 1, 2, 3, 3, 1, 1)
data.table::rleid(x)
#[1] 1 1 2 3 3 4 4
match(x, unique(x))
#[1] 1 1 2 3 3 1 1
You can select the option based on your requirement.
Fill Down Column with SubGroup Totals (R)
We can use n_distinct
from the dplyr
package.
library(dplyr)
dat2 <- dat %>%
group_by(Group) %>%
mutate(Count = n_distinct(UniqueID)) %>%
ungroup()
dat2
# # A tibble: 10 x 3
# Group UniqueID Count
# <chr> <chr> <int>
# 1 Grp1 1-A 3
# 2 Grp1 1-B 3
# 3 Grp1 1-B 3
# 4 Grp1 1-C 3
# 5 Grp2 2-A 2
# 6 Grp2 2-A 2
# 7 Grp2 2-B 2
# 8 Grp3 3-A 1
# 9 Grp3 3-A 1
# 10 Grp3 3-A 1
Data
dat <- read.table(text = "Group UniqueID
Grp1 '1-A'
Grp1 '1-B'
Grp1 '1-B'
Grp1 '1-C'
Grp2 '2-A'
Grp2 '2-A'
Grp2 '2-B'
Grp3 '3-A'
Grp3 '3-A'
Grp3 '3-A'",
header = TRUE, stringsAsFactors = FALSE)
Related Topics
Remove Backslashes from Character String
Find Duplicated Rows (Based on 2 Columns) in Data Frame in R
Issue with Geom_Text When Using Position_Dodge
How to Add Different Lines for Facets
Ggplot Separate Legend and Plot
The Same Width of the Bars in Geom_Bar(Position = "Dodge")
Can't Print to PDF Ggplot Charts
Fill and Border Colour in Geom_Point (Scale_Colour_Manual) in Ggplot
Min for Each Row in a Data Frame
R: Data.Table Cross-Join Not Working
How to Order the Months Chronologically in Ggplot2 Short of Writing the Months Out
R Compare Multiple Values with Vector and Return Vector
Using Multiple Criteria in Subset Function and Logical Operators
What Is the Algorithm Behind R Core's 'Split' Function