Pivot_wider and count number of occurrences retaining all other columns
You can count them first:
library(dplyr)
library(tidyr)
df = data.frame(product= c(1,1,1,2,2,2,3,3,3),
type = c(rep("a",3),rep("b", 3),rep("a",3)),
issue = c("fall","fall","trap",
"trap", "jump", "fall",
"trap", "jump", "open"))
df %>%
count(product, type, issue) %>%
pivot_wider(names_from = issue,
values_from = n)
#> # A tibble: 3 × 6
#> product type fall trap jump open
#> <dbl> <chr> <int> <int> <int> <int>
#> 1 1 a 2 1 NA NA
#> 2 2 b 1 1 1 NA
#> 3 3 a NA 1 1 1
Created on 2022-05-25 by the reprex package (v2.0.1)
R: count the number of occurrences
Let me answer my question, maybe it helps someone else too:
MyMode <- function(x) {
x <- strsplit(x,",")
names(sort(-table(x)))[1]
}
then apply it:
df$most <- lapply(as.character(test$apps),MyMode)
pivot_wider dynamically by number of ocurrences
You can count
number of rows for every Product
and Client
and sort them. Create a column names for each Product
based on their frequency and cast the data in wide format.
library(dplyr)
df %>%
count(Product, Client, sort = TRUE) %>%
group_by(Product) %>%
mutate(n = sprintf('n%d_client', row_number())) %>%
tidyr::pivot_wider(names_from = n, values_from = Client)
# Product n1_client n2_client
# <chr> <chr> <chr>
#1 A C1 C0
#2 B C2 C1
#3 C C0 C1
How to use group_by() and summarize() to count the occurances of datapoints?
count
needs data.frame/tibble
as input and not a vector. To make this work, we may need to reshape to 'long' format with pivot_longer
and apply the count
on the columns, and then use adorn_totals
to get the total column
library(dplyr)
library(tidyr)
library(janitor)
p %>%
pivot_longer(cols = everything()) %>%
count(name, value) %>%
pivot_wider(names_from = value, values_from = n, values_fill = 0) %>%
janitor::adorn_totals('col')
-output
name A B C D E Total
x 2 2 1 0 0 5
y 2 2 0 1 0 5
z 0 2 1 1 1 5
Tidyr how to spread into count of occurrence
These are a few ways of many to go about it:
1) With library dplyr
, you can simply group things and count into the format needed:
library(dplyr)
other %>% group_by(name) %>% summarise(N = sum(result == 'N'), Y = sum(result == 'Y'))
Source: local data frame [4 x 3]
name N Y
<fctr> <int> <int>
1 a 0 2
2 b 1 0
3 c 0 1
4 d 1 0
2) You can use a combination of table
and tidyr
spread as follows:
library(tidyr)
spread(as.data.frame(table(other)), result, Freq)
name N Y
1 a 0 2
2 b 1 0
3 c 0 1
4 d 1 0
3) You can use a combination of dplyr
and tidyr
to do as follows:
library(dplyr)
library(tidyr)
spread(count(other, name, result), result, n, fill = 0)
Source: local data frame [4 x 3]
Groups: name [4]
name N Y
<fctr> <dbl> <dbl>
1 a 0 2
2 b 1 0
3 c 0 1
4 d 1 0
Count occurrences of factors across multiple columns in grouped dataframe
You can stack col1
& col2
together, count the number of each combination, and then transform the table to a wide form.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(col1:col2) %>%
count(grp, name, value) %>%
pivot_wider(grp, names_from = c(name, value), names_sort = TRUE,
values_from = n, values_fill = 0)
# A tibble: 3 x 6
grp col1_A col1_B col2_B col2_C col2_D
<chr> <int> <int> <int> <int> <int>
1 a 1 2 2 0 1
2 b 2 0 0 2 0
3 c 1 2 0 2 1
A base
solution (Thank @GKi to refine the code):
table(cbind(df["grp"], col=do.call(paste0, stack(df[-1])[2:1])))
col
grp col1A col1B col2B col2C col2D
a 1 2 2 0 1
b 2 0 0 2 0
c 1 2 0 2 1
Pivot rows into columns with values of counts for each measurement R
Newest solution
library(data.table) #v 1.9.6+
setDT(df1)[, indx := .N, by = names(df1)
][indx > 1, if(uniqueN(Measurement) > 1) .SD, by = .(ID, TARG_AVG)]
# ID TARG_AVG Measurement indx
# 1: A 2.1 Len 3
# 2: A 2.1 Len 3
# 3: A 2.1 Len 3
# 4: A 2.1 Ht 2
# 5: A 2.1 Ht 2
# 6: A 2.5 Ht 2
# 7: A 2.5 Ht 2
# 8: A 2.5 Dep 2
# 9: A 2.5 Dep 2
# 10: B 3.1 Dep 2
# 11: B 3.1 Dep 2
# 12: B 3.1 Len 2
# 13: B 3.1 Len 2
# 14: B 3.3 Ht 2
# 15: B 3.3 Ht 2
# 16: B 3.3 Brt 2
# 17: B 3.3 Brt 2
Or the dplyr
equivalent
df1 %>%
group_by(ID, TARG_AVG, Measurement) %>%
filter(n() > 1) %>%
group_by(ID, TARG_AVG) %>%
filter(n_distinct(Measurement) > 1)
Older solution
library(data.table)
## dcast the data (no need in total)
res <- dcast(df1, ID + TARG_AVG ~ Measurement)
## filter by at least 2 incidents of at least length 2
res <- res[rowSums(res[-(1:2)] > 1) > 1,]
## melt the data back and filter again by at least 2 incidents
res <- melt(setDT(res), id = 1:2)[value > 1]
## Expand the data back
res[, .SD[rep(.I, value)]]
The solution to the original question
Here's a possible solution using reshape2
1st step
library(reshape2)
res <- dcast(df1, ID + TARG_AVG ~ Measurement, margins = "Measurement")
2nd step
res <- res[res$"(all)" > 2,]
3d step
library(data.table)
setDT(df1)[, if(.N > 2) .SD, by = .(ID, TARG_AVG)]
Counting number of values based on conditions in R
library(dplyr)
library(tidyr)
your_data %>%
group_by(Date) %>%
count(Currency) %>%
pivot_wider(
names_from = Currency,
names_glue = '{.value.count}',
values_from = n
)
Related Topics
How to Efficiently Read the First Character from Each Line of a Text File
R: Replacing Foreign Characters in a String
How to Ensure That a Partition Has Representative Observations from Each Level of a Factor
If Column Contains String Then Enter Value for That Row
Text Color Based on Contrast Against Background
Ggplot2 One Line Per Each Row Dataframe
Adding Percentages to a Grouped Barchart Columns in Ggplot2
Separate a Column into 2 Columns at the Last Underscore in R
Data Difference in 'As.Posixct' with Excel
Dual Y Axis (Second Axis) Use in Ggplot2
Change a Column from Birth Date to Age in R
Ggplot2: How to Transparently Shade Alternate Days on a Plot
Constructing a Named List Without Having to Type Each Object's Name Twice
Check If a String Contains at Least One Numeric Character in R
Data.Table - Left Outer Join on Multiple Tables
Scales = "Free" Works for Facet_Wrap But Doesn't for Facet_Grid
"Could Not Find Function" in Roxygen Examples During Cmd Check