Count Non-Na Values by Group

Count non-NA values by group

You can use this

mydf %>% group_by(col_1) %>% summarise(non_na_count = sum(!is.na(col_2)))

# A tibble: 2 x 2
   col_1 non_na_count
  <fctr>        <int>
1      A            1
2      B            2

Count non-`NA` of several columns by group using summarize and across from dplyr

I hope this is what you are looking for:

library(dplyr)

d %>%
  group_by(ID) %>%
  summarise(across(Col1:Col3, ~ sum(!is.na(.x)), .names = "non-{.col}"))

# A tibble: 3 x 4
     ID `non-Col1` `non-Col2` `non-Col3`
  <dbl>      <int>      <int>      <int>
1     1          3          2          3
2     2          2          0          2
3     3          1          1          0

Or if you would like to select columns by their shared string you can use this:

d %>%
  group_by(ID) %>%
  summarise(across(contains("Col"), ~ sum(!is.na(.x)), .names = "non-{.col}"))

Taking a count() after group_by() for non-missing values

count is not the right function here. The first argument to count is a dataframe or tibble specifically. However, what you are passing is a vector hence you get the error. Also count summarises the dataframe so that you have only one row per group. See for example,

library(dplyr)

df %>% 
  group_by(country) %>% 
  mutate(mean_x = mean(x, na.rm = TRUE)) %>%
  count(country)

#  country     n
#  <fct>   <int>
#1 JPN         2
#2 USA         2

If you want to add a new column without summarising, use add_count instead

df %>% 
  group_by(country) %>% 
  mutate(mean_x = mean(x, na.rm = TRUE)) %>%
  add_count(country)

#     id     x country mean_x     n
#  <dbl> <dbl> <fct>    <dbl> <int>
#1     1   2   USA        3       2
#2     2   4   USA        3       2
#3     3   3.5 JPN        3.5     2
#4     4  NA   JPN        3.5     2

However, both of this function don't do what you need. To count non-NA values per group, you need

df %>% 
  group_by(country) %>% 
  mutate(mean_x = mean(x, na.rm = TRUE), 
         count = length(na.omit(x)))
         #OR
         #count = sum(!is.na(x)))#as @Humpelstielzchen mentioned


#    id     x country mean_x count
#  <dbl> <dbl> <fct>    <dbl> <int>
#1     1   2   USA        3       2
#2     2   4   USA        3       2
#3     3   3.5 JPN        3.5     1
#4     4  NA   JPN        3.5     1

From an R dataframe: count non-NA values by column, grouped by one of the columns

We can use summarise_all

library(dplyr)
litmus %>% 
   group_by(grouping) %>% 
   summarise_all(funs(sum(!is.na(.))))

How to get count of all non NA values for all variables by group?

using dplyr >= 1.0.0:

df %>% 
  group_by(size) %>% 
   summarise(across(everything(), ~sum(!is.na(.))))
# A tibble: 2 x 6
  size     P1    P2    P4    P3    P5
* <chr> <int> <int> <int> <int> <int>
1 10K       5     5     2     2     1
2 5K        5     5     0     0     0

To get the exact output as OP's:

df %>% 
   group_by(size) %>% 
   summarise_all(~sum(!is.na(.))) %>% 
   arrange(desc(size)) # weird(ironic?) though
# A tibble: 2 x 6
  size     P1    P2    P4    P3    P5
  <chr> <int> <int> <int> <int> <int>
1 5K        5     5     0     0     0
2 10K       5     5     2     2     1

For dplyr versions below 0.8.9.9000:

 df %>% 
   group_by(size) %>% 
   summarise_all(~sum(!is.na(.)))
# A tibble: 2 x 6
  size     P1    P2    P4    P3    P5
* <chr> <int> <int> <int> <int> <int>
1 10K       5     5     2     2     1
2 5K        5     5     0     0     0

Data

df <- structure(list(size = c("5K", "5K", "5K", "5K", "5K", "10K", 
"10K", "10K", "10K", "10K"), P1 = c(3L, 3L, 2L, 4L, 3L, 4L, 5L, 
4L, 3L, 4L), P2 = c(3L, 3L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 3L), 
    P4 = c(NA, NA, NA, NA, NA, NA, NA, 4L, 3L, NA), P3 = c(NA, 
    NA, NA, NA, NA, NA, NA, 4L, 3L, NA), P5 = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, 1L, NA)), class = "data.frame", row.names = c(NA, 
-10L))

Count number of non-NA values by group

Or if you wanted to use data.table:

library(data.table)

dt[,sum(!is.na(X2)),by=.(Color)]

  Color V1
1:   Red  2
2:  Blue  0
3: Green  1

Also its easy enough to use an ifelse() in your data.table to get an NA for blue instead of 0. See:

dt[,ifelse(sum(!is.na(X2)==0),as.integer(NA),sum(!is.na(X2))),by=.(Color)]

   Color V1
1:   Red  2
2:  Blue NA
3: Green  1

Data:

 dt <- as.data.table(fread("Color    X1      X2    X3    X4
Red      1       1     0     2
Blue     0       NA    4     1 
Red      3       4     3     1
Green    2       2     1     0"))

R group by, counting non-NA values

We can use data.table. Convert the 'data.frame' to 'data.table' (setDT(toy_df)), grouped by 'Label', loop through the Subset of Data.table (.SD) and get the sum of non-NA values (!is.na(x))

library(data.table)
setDT(toy_df)[, lapply(.SD, function(x) sum(!is.na(x))), by = Label]
#   Label Y X1 X2
#1:     A 1  1  1
#2:     B 2  0  2
#3:     C 1  0  0

Or with dplyr using the same methodology

library(dplyr)
toy_df %>% 
      group_by(Label) %>%
      summarise_each(funs(sum(!is.na(.))))

Or a base R option with by and colSums grouped by the 4th column on logical matrix (!is.na(toy_df[-4]))

by(!is.na(toy_df[-4]), toy_df[4], FUN = colSums)

Or with rowsum with similar approach as in by except using the rowsum function.

rowsum(+(!is.na(toy_df[-4])), group=toy_df[,4])
#  Y X1 X2
#A 1  1  1
#B 2  0  2
#C 1  0  0

Count maximum consecutive repeated non-NA values grouped by another variable in dataframe R

Using data.table rleid to keep track of consecutive values you can do -

library(dplyr)
library(data.table)

df %>%
  filter(!is.na(Valor)) %>%
  group_by(ADM2_PCODE) %>%
  mutate(grp = rleid(Valor)) %>%
  count(grp) %>%
  summarise(max_consecutive_values = max(n))

#  ADM2_PCODE max_consecutive_values
#       <dbl>                  <int>
#1    1100015                      2
#2    1100016                      3
#3    1100017                      1

How to add columns to my data frame, including the counts of another columns , for two different column, in R?

Here is a tidyverse approach. You can group_by your ID column, and count rows that is not NA.

library(tidyverse)

df %>% 
  group_by(ID, l) %>% 
  summarize(n.x = sum(!is.na(x)), n.y = sum(!is.na(y)), .groups = "drop")

# A tibble: 4 x 4
     ID l       n.x   n.y
  <int> <chr> <int> <int>
1     1 s         5     4
2     2 ss        3     2
3     3 m         7     3
4     4 mm        2     2

How to count total non-na values using dplyr?

If it is the count of non-NA, create a logical vector (!is.na(value)) and get the sum

library(dplyr)
df %>%
      group_by(error_code) %>%
      summarise(value = sum(!is.na(value)))

If the column name/names is stored in an object, use across as it is generalized and can work for multiple columns

df %>%
   group_by(error_code) %>%
   summarise(across(categoryColumn, ~ sum(!is.na(.))))

sum(is.na(.)) outside the across will check for the total sum of all the elements as the . here implies the whole dataset and not the column per se

Count Non-Na Values by Group