Empty Factors in "By" Data.Table

Empty factors in by data.table

library(data.table)
set.seed(42)
dtr <- data.table(v1=sample(1:15), 
                  v2=factor(sample(letters[1:3], 15, replace = TRUE),levels=letters[1:5]),
                  v3=sample(c("yes", "no"), 15, replace = TRUE))

res <- dtr[,list(freq=.N,mm=sum(v1,na.rm=T)),by=list(v2,v3)]

You can use CJ (a cross join). Doing this after aggregation avoids setting the key for the big table and should be faster.

setkey(res,c("v2","v3"))
res[CJ(levels(dtr[,v2]),unique(dtr[,v3])),]

#    v2  v3 freq mm
# 1:  a  no    1  9
# 2:  a yes    2 11
# 3:  b  no    2 11
# 4:  b yes    3 23
# 5:  c  no    4 40
# 6:  c yes    3 26
# 7:  d  no   NA NA
# 8:  d yes   NA NA
# 9:  e  no   NA NA
# 10:  e yes   NA NA

Use a factor column in by and do not drop empty factors

If you are willing to run through the factor levels by enumerating them in i (rather than by setting by="group"), this will get you the hoped for results.

setkey(x, "group")
x[levels(group), .N, by=.EACHI]
#    group N
# 1:     a 2
# 2:     b 1
# 3:     c 0

aggregate with empty factor but keep row

# create dataset
df2 <- data.frame( val=rep(seq(1:3),4), factor=cut(rep(seq(1:3),4),breaks=c(1,2,3,4), include.lowest = TRUE, ordered_results=True , labels=LETTERS[1:3]))

library(dplyr)

levels(df2$factor) %>%                                    # get distinct levels of the factor variable
  data.frame(factor = .) %>%                              # create a data frame
  left_join(df2 %>%                                       # join with
            group_by(factor) %>%                             # for each value that exists
            summarise(x = sum(val)), by = "factor") %>%      # sum column val
  mutate(x = coalesce(x, 0L))                             # replace NAs with 0s

#   factor  x
# 1      A 12
# 2      B 12
# 3      C  0

Or without any package

dd = merge(data.frame(Group.1 = levels(df2$factor)), 
           aggregate(df2$val,list(df2$factor),sum), all.x = T)
dd$x = ifelse(is.na(dd$x), 0, dd$x)
dd

#   Group.1  x
# 1       A 12
# 2       B 12
# 3       C  0

Or using data.table package to check if it's faster

library(data.table)

# assuming you start with a data frame
df2 <- data.frame( val=rep(seq(1:3),4), factor=cut(rep(seq(1:3),4),breaks=c(1,2,3,4), include.lowest = TRUE, ordered_results=True , labels=LETTERS[1:3]))

# create a data table with all unique values of the variable "factor" and an index (key) on that variable
dt_levels = data.table(factor = levels(df2$factor), key = "factor")

# make df2 a data table with an index on column "factor" and aggregate
dt_sum = setDT(df2, key = "factor")[, list(Sum = sum(val)), by = "factor"]

# left join the two data tables and replace NA values with 0s
dt_result = dt_sum[dt_levels][, Sum := ifelse(is.na(Sum), 0, Sum)]

dt_result[]

#    factor Sum
# 1:      A  12
# 2:      B  12
# 3:      C   0

Complete with all combinations after counting on data.table

Here is one possible way to solve your problem. Note that the argument with=FALSE in the data.table context allows to select the columns using the standard data.frame rules. In the example below, I assumed that the columns used to compute all combinations are passed to myfun as a character vector.
Keep in mind that no columns in your dataset should be named gcases. .EACHI in by allows to perform some operation for each row in i.

myfun = function(d, g) {
  # get levels (for factors) and unique values for other types. 
  fn <- function(x) if(is.factor(x)) levels(x) else unique(x)
  gcases <- lapply(setDT(d, key=g)[, g, with=FALSE], fn)
  
  # count based on all combinations
  d[do.call(CJ, gcases), .N, keyby=.EACHI]
}

Ordering factors in data table using DT package

You could use a hidden column with the numeric value of the factor and sort the factors according to that hidden column:

library('DT')
value <- factor(c(7.5, 12.5, 7.5, 17.5),
                levels = c(7.5, 12.5, 17.5),
                labels = c('5-10%', '10-15%', '15-20%'))

example <- data.frame(name = c('A', 'B', 'C', 'D'),
                      value = value,
                      levels=as.numeric(value))

datatable(example,
          rownames = FALSE,
          options = list(columnDefs=list(list(orderData=2,targets=1),
                                         list(visible=FALSE,targets=2))))

Empty Factors in "By" Data.Table