Count Number of Rows in a Data Frame in R Based on Group

count number of rows in a data frame in R based on group

Here's an example that shows how table(.) (or, more closely matching your desired output, data.frame(table(.)) does what it sounds like you are asking for.

Note also how to share reproducible sample data in a way that others can copy and paste into their session.

Here's the (reproducible) sample data:

mydf <- structure(list(ID = c(110L, 111L, 121L, 131L, 141L), 
                       MONTH.YEAR = c("JAN. 2012", "JAN. 2012", 
                                      "FEB. 2012", "FEB. 2012", 
                                      "MAR. 2012"), 
                       VALUE = c(1000L, 2000L, 3000L, 4000L, 5000L)), 
                  .Names = c("ID", "MONTH.YEAR", "VALUE"), 
                  class = "data.frame", row.names = c(NA, -5L))

mydf
#    ID MONTH.YEAR VALUE
# 1 110  JAN. 2012  1000
# 2 111  JAN. 2012  2000
# 3 121  FEB. 2012  3000
# 4 131  FEB. 2012  4000
# 5 141  MAR. 2012  5000

Here's the calculation of the number of rows per group, in two output display formats:

table(mydf$MONTH.YEAR)
# 
# FEB. 2012 JAN. 2012 MAR. 2012 
#         2         2         1

data.frame(table(mydf$MONTH.YEAR))
#        Var1 Freq
# 1 FEB. 2012    2
# 2 JAN. 2012    2
# 3 MAR. 2012    1

Count number of rows within each group

Current best practice (tidyverse) is:

require(dplyr)
df1 %>% count(Year, Month)

Count number of rows per group and add result to original data frame

Using data.table:

library(data.table)
dt = as.data.table(df)

# or coerce to data.table by reference:
# setDT(df)

dt[ , count := .N, by = .(name, type)]

For pre-data.table 1.8.2 alternative, see edit history.

Using dplyr:

library(dplyr)
df %>%
  group_by(name, type) %>%
  mutate(count = n())

Or simply:

add_count(df, name, type)

Using plyr:

plyr::ddply(df, .(name, type), transform, count = length(num))

Add a column that count number of rows until the first 1, by group in R

df <- data.frame(Group=c(1,1,1,1,2,2),
                 var1=c(1,0,0,1,1,1),
                 var2=c(0,0,1,1,0,0),
                 var3=c(0,1,0,0,0,1))

This works for any number of variables as long as the structure is the same as in the example (i.e. Group + many variables that are 0 or 1)

df %>% 
  mutate(rownr = row_number()) %>%
  pivot_longer(-c(Group, rownr)) %>%
  group_by(Group, name) %>%
  mutate(out = cumsum(value != 1 & (cumsum(value) < 1)) + 1,
         out = ifelse(max(out) > n(), 0, max(out))) %>% 
  pivot_wider(names_from = c(name, name), values_from = c(value, out)) %>% 
  select(-rownr)

Returns:

  Group value_var1 value_var2 value_var3 out_var1 out_var2 out_var3
  <dbl>      <dbl>      <dbl>      <dbl>    <dbl>    <dbl>    <dbl>
1     1          1          0          0        1        3        2
2     1          0          0          1        1        3        2
3     1          0          1          0        1        3        2
4     1          1          1          0        1        3        2
5     2          1          0          0        1        0        2
6     2          1          0          1        1        0        2

Count rows in data table with certain values by group

You can solve it as follows:

cols <- c("number_of_offices", "number_of_apartments")
df[, (cols) := .(sum(Type == "office"), sum(Type == "apartment")), Property]

# Property      Type number_of_offices number_of_apartments
# 1:        1 apartment                 1                    1
# 2:        1    office                 1                    1
# 3:        2    office                 2                    0
# 4:        2    office                 2                    0
# 5:        3 apartment                 1                    2
# 6:        3 apartment                 1                    2
# 7:        3    office                 1                    2

Calculate frequency for group-rows within data frame

The problem here is that although df is technically a data frame, it is not well structured. A data frame should have one column per variable and one row per observation. Your data would make more sense if it were transposed first:

library(tibble)
library(dplyr)

df <- rownames_to_column(as.data.frame(t(df)), "sample")

df[2:5] <- lapply(df[2:5], as.numeric)

df

#>    sample Species1 Species2 Species3 Species4 Group
#> 1 sample1        2        0        5        0   Gr1
#> 2 sample2       12       13        0        0   Gr1
#> 3 sample3       52        0        0        0   Gr2
#> 4 sample4      221        0       25        0   Gr2

Now we can pivot to make the Species its own column, and it is straightforward to do the calculations you need:

tidyr::pivot_longer(df, 2:5) %>%
  group_by(name, Group) %>%
  summarise(absolute = sum(value > 0),
            Freq = absolute / length(name))

#> # A tibble: 8 x 4
#> # Groups:   name [4]
#>   name     Group absolute  Freq
#>   <chr>    <chr>    <int> <dbl>
#> 1 Species1 Gr1          2   1  
#> 2 Species1 Gr2          2   1  
#> 3 Species2 Gr1          1   0.5
#> 4 Species2 Gr2          0   0  
#> 5 Species3 Gr1          1   0.5
#> 6 Species3 Gr2          1   0.5
#> 7 Species4 Gr1          0   0  
#> 8 Species4 Gr2          0   0

Count rows with the same pattern in R

dplyr

library(dplyr)
dat %>%
  group_by(start_id, end_id, type) %>%
  tally() %>%
  ungroup()
# # A tibble: 3 x 4
#   start_id end_id type      n
#      <dbl>  <dbl> <chr> <int>
# 1        1      2 a         1
# 2        1      3 b         2
# 3        2      5 a         2

base R

aggregate(. ~ start_id + end_id + type, data = dat, FUN = length)
#   start_id end_id type id
# 1        1      2    a  1
# 2        2      5    a  2
# 3        1      3    b  2

Data

dat <- structure(list(start_id = c(1, 2, 1, 2, 1), end_id = c(2, 5, 3, 5, 3), type = c("a", "a", "b", "a", "b"), id = 1:5), row.names = c(NA, -5L), class = "data.frame")

Count Number of Rows in a Data Frame in R Based on Group