Pivot_Wider, Count Number of Occurrences

Pivot_wider and count number of occurrences retaining all other columns

You can count them first:

library(dplyr)
library(tidyr)

df = data.frame(product= c(1,1,1,2,2,2,3,3,3),
type = c(rep("a",3),rep("b", 3),rep("a",3)),
issue = c("fall","fall","trap",
"trap", "jump", "fall",
"trap", "jump", "open"))
df %>%
count(product, type, issue) %>%
pivot_wider(names_from = issue,
values_from = n)
#> # A tibble: 3 × 6
#> product type fall trap jump open
#> <dbl> <chr> <int> <int> <int> <int>
#> 1 1 a 2 1 NA NA
#> 2 2 b 1 1 1 NA
#> 3 3 a NA 1 1 1

Created on 2022-05-25 by the reprex package (v2.0.1)

R: count the number of occurrences

Let me answer my question, maybe it helps someone else too:

MyMode <-   function(x) {
x <- strsplit(x,",")
names(sort(-table(x)))[1]
}

then apply it:

 df$most <- lapply(as.character(test$apps),MyMode)

pivot_wider dynamically by number of ocurrences

You can count number of rows for every Product and Client and sort them. Create a column names for each Product based on their frequency and cast the data in wide format.

library(dplyr)

df %>%
count(Product, Client, sort = TRUE) %>%
group_by(Product) %>%
mutate(n = sprintf('n%d_client', row_number())) %>%
tidyr::pivot_wider(names_from = n, values_from = Client)

# Product n1_client n2_client
# <chr> <chr> <chr>
#1 A C1 C0
#2 B C2 C1
#3 C C0 C1

How to use group_by() and summarize() to count the occurances of datapoints?

count needs data.frame/tibble as input and not a vector. To make this work, we may need to reshape to 'long' format with pivot_longer and apply the count on the columns, and then use adorn_totals to get the total column

library(dplyr)
library(tidyr)
library(janitor)
p %>%
pivot_longer(cols = everything()) %>%
count(name, value) %>%
pivot_wider(names_from = value, values_from = n, values_fill = 0) %>%
janitor::adorn_totals('col')

-output

  name A B C D E Total
x 2 2 1 0 0 5
y 2 2 0 1 0 5
z 0 2 1 1 1 5

Tidyr how to spread into count of occurrence

These are a few ways of many to go about it:

1) With library dplyr, you can simply group things and count into the format needed:

library(dplyr)
other %>% group_by(name) %>% summarise(N = sum(result == 'N'), Y = sum(result == 'Y'))
Source: local data frame [4 x 3]

name N Y
<fctr> <int> <int>
1 a 0 2
2 b 1 0
3 c 0 1
4 d 1 0

2) You can use a combination of table and tidyr spread as follows:

library(tidyr)
spread(as.data.frame(table(other)), result, Freq)
name N Y
1 a 0 2
2 b 1 0
3 c 0 1
4 d 1 0

3) You can use a combination of dplyr and tidyr to do as follows:

library(dplyr)
library(tidyr)
spread(count(other, name, result), result, n, fill = 0)
Source: local data frame [4 x 3]
Groups: name [4]

name N Y
<fctr> <dbl> <dbl>
1 a 0 2
2 b 1 0
3 c 0 1
4 d 1 0

Count occurrences of factors across multiple columns in grouped dataframe

You can stack col1 & col2 together, count the number of each combination, and then transform the table to a wide form.

library(dplyr)
library(tidyr)

df %>%
pivot_longer(col1:col2) %>%
count(grp, name, value) %>%
pivot_wider(grp, names_from = c(name, value), names_sort = TRUE,
values_from = n, values_fill = 0)

# A tibble: 3 x 6
grp col1_A col1_B col2_B col2_C col2_D
<chr> <int> <int> <int> <int> <int>
1 a 1 2 2 0 1
2 b 2 0 0 2 0
3 c 1 2 0 2 1

A base solution (Thank @GKi to refine the code):

table(cbind(df["grp"], col=do.call(paste0, stack(df[-1])[2:1])))

col
grp col1A col1B col2B col2C col2D
a 1 2 2 0 1
b 2 0 0 2 0
c 1 2 0 2 1

Pivot rows into columns with values of counts for each measurement R

Newest solution

library(data.table) #v 1.9.6+
setDT(df1)[, indx := .N, by = names(df1)
][indx > 1, if(uniqueN(Measurement) > 1) .SD, by = .(ID, TARG_AVG)]
# ID TARG_AVG Measurement indx
# 1: A 2.1 Len 3
# 2: A 2.1 Len 3
# 3: A 2.1 Len 3
# 4: A 2.1 Ht 2
# 5: A 2.1 Ht 2
# 6: A 2.5 Ht 2
# 7: A 2.5 Ht 2
# 8: A 2.5 Dep 2
# 9: A 2.5 Dep 2
# 10: B 3.1 Dep 2
# 11: B 3.1 Dep 2
# 12: B 3.1 Len 2
# 13: B 3.1 Len 2
# 14: B 3.3 Ht 2
# 15: B 3.3 Ht 2
# 16: B 3.3 Brt 2
# 17: B 3.3 Brt 2

Or the dplyr equivalent

df1 %>%
group_by(ID, TARG_AVG, Measurement) %>%
filter(n() > 1) %>%
group_by(ID, TARG_AVG) %>%
filter(n_distinct(Measurement) > 1)

Older solution

library(data.table)
## dcast the data (no need in total)
res <- dcast(df1, ID + TARG_AVG ~ Measurement)
## filter by at least 2 incidents of at least length 2
res <- res[rowSums(res[-(1:2)] > 1) > 1,]
## melt the data back and filter again by at least 2 incidents
res <- melt(setDT(res), id = 1:2)[value > 1]
## Expand the data back
res[, .SD[rep(.I, value)]]

The solution to the original question

Here's a possible solution using reshape2

1st step

library(reshape2)
res <- dcast(df1, ID + TARG_AVG ~ Measurement, margins = "Measurement")

2nd step

res <- res[res$"(all)" > 2,]

3d step

library(data.table)
setDT(df1)[, if(.N > 2) .SD, by = .(ID, TARG_AVG)]

Counting number of values based on conditions in R

library(dplyr)
library(tidyr)
your_data %>%
group_by(Date) %>%
count(Currency) %>%
pivot_wider(
names_from = Currency,
names_glue = '{.value.count}',
values_from = n
)


Related Topics



Leave a reply



Submit