Can Dcast Be Used Without an Aggregate Function

Can dcast be used without an aggregate function?

I don't think there is a way to do it directly but we can add in an additional column which will help us out

df2 <- structure(list(id = c("A", "B", "C", "A", "B", "C", "C"), cat = c("SS", 
"SS", "SS", "SV", "SV", "SV", "SV"), val = c(220L, 222L, 223L, 
224L, 225L, 220L, 1L)), .Names = c("id", "cat", "val"), class = "data.frame", row.names = c(NA, 
-7L))

library(reshape2)
library(plyr)
# Add a variable for how many times the id*cat combination has occured
tmp <- ddply(df2, .(id, cat), transform, newid = paste(id, seq_along(cat)))
# Aggregate using this newid and toss in the id so we don't lose it
out <- dcast(tmp, id + newid ~ cat, value.var = "val")
# Remove newid if we want
out <- out[,-which(colnames(out) == "newid")]
> out
#  id  SS  SV
#1  A 220 224
#2  B 222 225
#3  C 223 220
#4  C  NA   1

R: Casting a character column without aggregation

We can create a sequence column for the duplicates and it should work

library(data.table)
dcast(setDT(Rank2), Cat + rowid(Rank) ~ Rank, value.var = 'Group')

why is dcast so impossible to pass a non-aggregate function?

We can use rbindlist after placing the datasets in a list

DTfinal <- rbindlist(list(a,b))
dim(DTfinal)
#[1] 400   4
dim(a)
#[1] 200   4
dim(b)
#[1] 200   4

Suppose if both datasets have different number of columns, and we have a vector of column names that we need to keep

nm1 <- intersect(names(a), names(b))
rbindlist(list(a[, nm1, with = FALSE], b[, nm1, with = FALSE]), idcol= "id")

Update

If we need to convert to 'wide' format

DTfinal <-  rbindlist(setNames(list(a,b), c("a", "b")), idcol= "id")
dcast(DTfinal, grp + time ~id, value.var = c('mean', 'median'))
#     grp time       mean_a    mean_b    median_a median_b
#  1:   a    1   0.52171471  25.99502 -0.06558068       25
#  2:   a    2   0.36445108  25.99010  0.13518412       25
#  3:   a    3   0.08993721  25.98522  0.20128790       25
#  4:   a    4 -64.04617391  25.98039  0.40999376       25
#  5:   a    5   0.81730847  25.97561 -0.03481697       25
# ---                                                     
#196:   d   46   1.62818374 176.67568 -0.26695999      176
#197:   d   47  -1.45259871 176.67340  0.14893356      176
#198:   d   48   9.59796683 176.67114 -0.05834959      176
#199:   d   49  -2.74285453 176.66890 -0.22094347      176
#200:   d   50   1.22109043 176.66667 -0.08172928      176

reshape2 dcast without aggregation - problems with seq

You may need to create a sequence column

mymelt$indx <- with(mymelt, ave(seq_along(species), species, FUN=seq_along))
dcast(mymelt, sample+variable+indx~species, value.var='value', fill=0)
#        sample variable indx  M. edulis Mytilus sp.
#1  41411elba_2  cell_nr    2    5107.51        0.00
#2  41411elba_2   biovol    4 1021502.16        0.00
#3   41442bay_1  cell_nr    1       0.00        6.22
#4   41442bay_1   biovol    4       0.00     1243.04
#5   41443bay_2  cell_nr    1     599.14        0.00
#6   41443bay_2   biovol    3  114028.15        0.00
#7   41502bay_3  cell_nr    3       0.00        2.74
#8   41502bay_3   biovol    6       0.00      548.21
#9  41502elba_1  cell_nr    2       0.00        1.35
#10 41502elba_1   biovol    5       0.00      260.64

EDIT

If the dataset still has duplicates, try

 mymelt$indx <- with( mymelt,
                      ave(seq_along(species),
                          species,
                          sample,
                          FUN=seq_along
                      )
                )
 dcast(mymelt, sample+variable+indx~species, value.var='value', fill=0)

data

 mydata <- structure(
               list(sample  = c("41442bay_1", "41502elba_1", "41502bay_3", "41443bay_2", "41411elba_2"),
                    species = c("Mytilus sp.", "Mytilus sp.", "Mytilus sp.", "M. edulis", "M. edulis"),
                    cell_nr = c(6.22, 1.35, 2.74, 599.14, 5107.51),
                    biovol  = c(1243.04, 260.64, 548.21, 114028.15, 1021502.16)
               ),
               .Names    = c("sample", "species", "cell_nr", "biovol"),
               class     = "data.frame",
               row.names = c("1", "2", "3", "4", "5")
           )

Customise the aggregate function inside dcast based on the max value of a column in data.table?

Here is one option where you take the relevent subset before dcasting:

DT[order(V4, decreasing = TRUE)
   ][, dcast(unique(.SD, by = c("V1", "V2")), V1 ~ V2, value.var = "V3")] 

#    V1  1    2
# 1:  a  B   cc
# 2:  b st   st
# 3:  c cc <NA>

Alternatively order and use a custom function in dcast():

dcast(
  DT[order(V4, decreasing = TRUE)], 
  V1 ~ V2, 
  value.var = "V3", 
  fun.aggregate = function(x) x[1]
)

Can Dcast Be Used Without an Aggregate Function