Dcast Without Id Variables

dcast without ID variables

Another option is unstack

out <- unstack(aql,value~variable)
head(out)
# ozone solar.r wind temp month day
#1 41 190 7.4 67 5 1
#2 36 118 8.0 72 5 2
#3 12 149 12.6 74 5 3
#4 18 313 11.5 62 5 4
#5 NA NA 14.3 56 5 5
#6 28 NA 14.9 66 5 6

As the question is about dcast, we can create a sequence column and then use dcast

aql$indx <- with(aql, ave(seq_along(variable), variable, FUN=seq_along))
out1 <- dcast(aql, indx~variable, value.var='value')[,-1]
head(out1)
# ozone solar.r wind temp month day
#1 41 190 7.4 67 5 1
#2 36 118 8.0 72 5 2
#3 12 149 12.6 74 5 3
#4 18 313 11.5 62 5 4
#5 NA NA 14.3 56 5 5
#6 28 NA 14.9 66 5 6

If you are using data.table, the devel version of data.table ie. v1.9.5 also has dcast function. Instructions to install the devel version are here

 library(data.table)#v1.9.5+
setDT(aql)[, indx:=1:.N, variable]
dcast(aql, indx~variable, value.var='value')[,-1]

Is it possible to use dcast without variable column?

With dcast, we can create formula on the fly with an expression created with paste and rowid

library(data.table)
dcast(dt, id ~ paste0('var_', rowid(id)))

-output

   id var_1 var_2
1: 1 100 300
2: 2 200 NA

How to specify ID variables in dcast?

We can specify the id.var in melt, otherwise, it can automatically pick the variables based on the type.

library(reshape2)
dcast(melt(datahave, id.var = c("YEAR", "SCHOOL_NAME", "CONTENT_AREA")),
SCHOOL_NAME ~ CONTENT_AREA + variable)
# SCHOOL_NAME Eng. Language Arts_BELOW_BASIC_PCT Eng. Language Arts_BASIC_PCT
#1 5TH AND 6TH GRADE CTR. 38.1 28.3
#2 6TH GRADE CENTER 7.6 27.8
#3 7th and 8th Grade Center 52.1 27.4
# Eng. Language Arts_ADVANCED_PCT Mathematics_BELOW_BASIC_PCT Mathematics_BASIC_PCT Mathematics_ADVANCED_PCT
#1 10.1 39.0 30.3 14.6
#2 21.8 19.1 37.7 17.5
#3 1.7 NA NA NA
# Science_BELOW_BASIC_PCT Science_BASIC_PCT Science_ADVANCED_PCT
#1 25.4 41.7 12.3
#2 NA NA NA
#3 NA NA NA

The melt/dcast wrapper is recast which can be used as well

recast(datahave, id.var = c("YEAR", "SCHOOL_NAME", "CONTENT_AREA"), 
SCHOOL_NAME ~ CONTENT_AREA + variable)

data

datahave <- structure(list(YEAR = c(2015L, 2015L, 2015L, 2015L, 2015L, 2015L
), SCHOOL_NAME = c("5TH AND 6TH GRADE CTR.", "5TH AND 6TH GRADE CTR.",
"5TH AND 6TH GRADE CTR.", "6TH GRADE CENTER", "6TH GRADE CENTER",
"7th and 8th Grade Center"), CONTENT_AREA = c("Eng. Language Arts",
"Mathematics", "Science", "Eng. Language Arts", "Mathematics",
"Eng. Language Arts"), BELOW_BASIC_PCT = c(38.1, 39, 25.4, 7.6,
19.1, 52.1), BASIC_PCT = c(28.3, 30.3, 41.7, 27.8, 37.7, 27.4
), ADVANCED_PCT = c(10.1, 14.6, 12.3, 21.8, 17.5, 1.7)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))

dcast data having id.vars variable with levels of unequal length

Here is the one-liner:

library( 'data.table' )
setDT( df1 ) # convert data frame to data table by reference
dcast( df1, date_time ~ id, value.var = 'Temp', fun = function( x ) x, fill = NA_real_ )
# date_time Sens_1 Sens_2 Sens_3
# 1: 2012-02-27 00:11:00 23.0 34.6 14.6
# 2: 2012-02-27 00:11:30 23.4 33.4 14.4
# 3: 2012-02-27 00:12:00 34.6 NA 13.6
# 4: 2012-02-27 00:12:30 35.4 NA 15.4
# 5: 2012-02-27 00:13:00 25.5 36.5 15.5
# 6: 2012-02-27 00:13:30 26.3 38.8 16.8

Data:

df1 <- structure(list(date_time = c("2012-02-27 00:11:00", "2012-02-27 00:11:30", 
"2012-02-27 00:12:00", "2012-02-27 00:12:30", "2012-02-27 00:13:00",
"2012-02-27 00:13:30", "2012-02-27 00:11:00", "2012-02-27 00:11:30",
"2012-02-27 00:12:00", "2012-02-27 00:12:30", "2012-02-27 00:13:00",
"2012-02-27 00:13:30", "2012-02-27 00:11:00", "2012-02-27 00:11:30",
"2012-02-27 00:12:00", "2012-02-27 00:12:30", "2012-02-27 00:13:00",
"2012-02-27 00:13:30"),
id = c("Sens_1", "Sens_1", "Sens_1", "Sens_1", "Sens_1", "Sens_1", "Sens_2", "Sens_2",
"Sens_1", "Sens_1", "Sens_2", "Sens_2", "Sens_3", "Sens_3", "Sens_3", "Sens_3",
"Sens_3", "Sens_3"),
Temp = c(23, 23.4, 24.6, 25.2, 25.5, 26.3, 34.6, 33.4, 34.6, 35.4, 36.5, 38.8, 14.6,
14.4, 13.6, 15.4, 15.5, 16.8)), .Names = c("date_time", "id", "Temp"),
row.names = c(NA, -18L), class = "data.frame")

From long to wide form without id.var?

I'm pretty sure this has been answered before. Anyway, unstack is convenient in this particular case with equal group size:

unstack(dat1, form = value ~ id)
# A B
# 1 1 5
# 2 2 6
# 3 3 7
# 4 4 8

Can dcast be used without an aggregate function?

I don't think there is a way to do it directly but we can add in an additional column which will help us out

df2 <- structure(list(id = c("A", "B", "C", "A", "B", "C", "C"), cat = c("SS", 
"SS", "SS", "SV", "SV", "SV", "SV"), val = c(220L, 222L, 223L,
224L, 225L, 220L, 1L)), .Names = c("id", "cat", "val"), class = "data.frame", row.names = c(NA,
-7L))

library(reshape2)
library(plyr)
# Add a variable for how many times the id*cat combination has occured
tmp <- ddply(df2, .(id, cat), transform, newid = paste(id, seq_along(cat)))
# Aggregate using this newid and toss in the id so we don't lose it
out <- dcast(tmp, id + newid ~ cat, value.var = "val")
# Remove newid if we want
out <- out[,-which(colnames(out) == "newid")]
> out
# id SS SV
#1 A 220 224
#2 B 222 225
#3 C 223 220
#4 C NA 1

`dcast` with empty RHS

This is now possible using the rowid function:

dcast(DT, id ~ rowid(id), value.var = "var")
# id 1 2 3
# 1: 6 1.1050942 0.1271620 1.3051373
# 2: 7 -0.5441056 -0.6866828 -0.8083762
# 3: 8 -0.6812820 -1.1934716 -1.3913903
# 4: 9 -0.3462497 -0.8229276 -1.0884394
# 5: 10 -0.4600681 0.6173795 -1.0125658

See ?rowid for more options, examples, and explanation.

dcast With multiple Ids and variables

A tidyverse solution, using gather and spread from tidyr pacakge:

library(dplyr)
library(tidyr) #version 1.0.0 which has pivot_wider

df1 %>%
group_by(Type) %>%
mutate(name_x = row_number()) %>%
gather(key=var, value=val, c(Score, Time)) %>%
mutate(var = paste(var, name_x, sep="_")) %>%
select(-name_x) %>%
spread(key=var, value=val)

#> # A tibble: 3 x 11
#> # Groups: Type [3]
#> id Date Type Score_1 Score_2 Score_3 Score_4 Time_1 Time_2 Time_3 Time_4
#> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
#> 1 1 2001~ aaa 123 456 789 NA 12:12 13:12 14:12 <NA>
#> 2 2 2001~ ddd 113 145 NA NA 15:12 16:12 <NA> <NA>
#> 3 3 2001~ bbb 789 145 113 145 17:12 18:12 19:12 20:12

You can do the same with pivot_wider much more conveniently:

df1 %>% 
group_by(Type) %>%
mutate(name_x = row_number()) %>%
pivot_wider(id_cols = c("id","Date", "Type"),
names_from = c("name_x"),
values_from = c("Score", "Time"))

Data:

df1 <- data.frame(id=c(1,1,1,2,2,3,3,3,3),
Date = c(rep("2001-01-13", 3), rep("2001-01-16", 2), rep("2001-01-18", 4)),
Type = c(rep("aaa",3), rep("ddd", 2), rep("bbb",4)),
Score = c(123,456,789,113,145,789,145,113,145),
Time = paste0(12:20, ":12"),
stringsAsFactors = F)

Apply dcast multiple times for different variables

Here is an option with cSplit_e

library(splitstackshape)
cSplit_e(mydf, 'V1', type = 'character', fill = '0') %>%
cSplit_e('V2', type = 'character', fill = '0')
# A V1 V2 V1_x V1_y V2_u V2_v V2_w
#1: A x u 1 0 1 0 0
#2: B x v 1 0 0 1 0
#3: C y w 0 1 0 0 1
#4: D x v 1 0 0 1 0
#5: E y u 0 1 1 0 0

Or with table from base R

 do.call(cbind, lapply(2:3, function(i) table(mydf$A, mydf[[i]])))

Or the same approach in data.table syntax

nm1 <- names(mydf)[-1]
out <- mydf[, lapply(.SD, function(x)
as.data.frame.matrix(table(A, x))), .SDcols = nm1]
mydf[, names(out) := out][]
# A V1 V2 V1.x V1.y V2.u V2.v V2.w
#1: A x u 1 0 1 0 0
#2: B x v 1 0 0 1 0
#3: C y w 0 1 0 0 1
#4: D x v 1 0 0 1 0
#5: E y u 0 1 1 0 0

reshape2: dcast when there are multiple values for one cell but keep this values

This can be done with dcast (here from data.table) though you need a row identifier.

library(data.table)
dcast(dt, HLA_Status + rowid(HLA_Status, variable) ~ variable)
# HLA_Status HLA_Status_1 CCL24 SPP1
#1: PC 1 5.698 2.698
#2: PC 2 89.457 9.457
#3: PC 3 78.230 8.230
#4: PP 1 9.645 23.120
#5: PP 2 56.320 36.320
#6: PP 3 7.268 17.268

data

dt <- fread("    HLA_Status    variable      value
PP CCL24 9.645
PP CCL24 56.32
PP CCL24 7.268
PC CCL24 5.698
PC CCL24 89.457
PC CCL24 78.23
PP SPP1 23.12
PP SPP1 36.32
PP SPP1 17.268
PC SPP1 2.698
PC SPP1 9.457
PC SPP1 8.23")


Related Topics



Leave a reply



Submit