Reshaping a Data Frame with More Than One Measure Variable

Reshaping a data frame with more than one measure variable

Here's how you could do this with reshape(), from base R:

df2 <- reshape(df, direction="long",
               idvar = 1:2, varying = list(c(3,5), c(4,6)),
               v.names = c("p1", "p2"), times = c("quiz1", "quiz2"))

## Checking the output    
rbind(head(df2, 3), tail(df2, 3))
#           student month  time   p1   p2
# 1.1.quiz1       1     1 quiz1 20.0 30.0
# 1.2.quiz1       1     2 quiz1 20.1 30.1
# 1.3.quiz1       1     3 quiz1 20.2 30.2
# 2.3.quiz2       2     3 quiz2 80.7 90.7
# 2.4.quiz2       2     4 quiz2 80.8 90.8
# 2.5.quiz2       2     5 quiz2 80.9 90.9

You can also use column names (instead of column numbers) for idvar and varying. It's more verbose, but seems like better practice to me:

## The same operation as above, using just column *names*
df2 <- reshape(df, direction="long", idvar=c("student", "month"),
               varying = list(c("quiz1p1", "quiz2p1"), 
                              c("quiz1p2", "quiz2p2")), 
               v.names = c("p1", "p2"), times = c("quiz1", "quiz2"))

Reshape a dataframe to long format with multiple sets of measure columns

If the 'year', 'pop', columns are alternating, we can subset with c(TRUE, FALSE) to get the columns 1, 3, 5,..etc. and c(FALSE, TRUE) to get 2, 4, 6,.. due to the recycling. Then, we unlist the columns and create a new 'data.frame.

 df2 <- data.frame(year=unlist(df1[c(TRUE, FALSE)]), 
                  pop=unlist(df1[c(FALSE, TRUE)]))
 row.names(df2) <- NULL
 head(df2)
 #   year    pop
 #1            
 #2 16XX 4675,0
 #3 17XX 4739,3
 #4 17XX 4834,0
 #5 180X 4930,0
 #6 180X 5029,0

Or another option is

library(splitstackshape)
merged.stack(transform(df1, id=1:nrow(df1)), var.stubs=c('year', 'pop'), 
        sep='var.stubs')[order(.time_1), 3:4, with=FALSE]

data

df1 <- structure(list(year1 = c("", "16XX", "17XX", "17XX", "180X", 
"180X", "181X", "181X", "182X", "182X"), pop1 = c("", "4675,0", 
"4739,3", "4834,0", "4930,0", "5029,0", "5129,0", "5231,9", "5297,0", 
"5362,0"), year2 = c(NA, 1900L, 1901L, 1902L, 1903L, 1904L, 1905L, 
1906L, 1907L, 1908L), pop2 = c("", "6453,0", "6553,5", "6684,0", 
"6818,0", "6955,0", "7094,0", "7234,7", "7329,0", "7422,0"), 
year3 = c(NA, 1930L, 1931L, 1932L, 1933L, 1934L, 1935L, 1936L, 
1937L, 1938L), pop3 = c("", "9981,2", "", "", "", "", "", 
"", "", "")), .Names = c("year1", "pop1", "year2", "pop2", 
"year3", "pop3"), class = "data.frame", row.names = c(NA, -10L))

Data frame from wide to long with multiple variables and ids R

Answer already exists here: https://stackoverflow.com/a/12466668/2371031

e.g.,

set.seed(123)
wide_df = data.frame('participant_id' = LETTERS[1:12]
                     , 'judgment_1' = round(rnorm(12)*100)
                     , 'correct_1' = round(rnorm(12)*100)
                     , 'text_id_1' = sample(1:12, 12, replace = F)
                     , 'judgment_2' = round(rnorm(12)*100)
                     , 'correct_2' = round(rnorm(12)*100)
                     , 'text_id_2' = sample(13:24, 12, replace = F)
)

dl <- reshape(data = wide_df, 
              idvar = "participant_id", 
              varying = list(judgment=c(2,5),correct=c(3,6),text_id=c(4,7)), 
              direction="long", 
              v.names = c("judgment","correct","text_id"),
              sep="_")

Result:

    participant_id time judgment correct text_id
A.1              A    1      -56      40       4
B.1              B    1      -23      11      10
C.1              C    1      156     -56       1
D.1              D    1        7     179      12
E.1              E    1       13      50       7
F.1              F    1      172    -197      11
G.1              G    1       46      70       9
H.1              H    1     -127     -47       2
I.1              I    1      -69    -107       8
J.1              J    1      -45     -22       3
K.1              K    1      122    -103       5
L.1              L    1       36     -73       6
A.2              A    2       43    -127      17
B.2              B    2      -30     217      14
C.2              C    2       90     121      22
D.2              D    2       88    -112      15
E.2              E    2       82     -40      13
F.2              F    2       69     -47      19
G.2              G    2       55      78      24
H.2              H    2       -6      -8      20
I.2              I    2      -31      25      21
J.2              J    2      -38      -3      16
K.2              K    2      -69      -4      23
L.2              L    2      -21     137      18

Reshaping data to long format with multiple variables as measure.vars

reshape function is good here.

reshape(df, varying=list(c(3,6), c(4,7), c(5,8)), 
            times=c("A","B"), v.names=paste0("Col_",1:3), direction="long")

data

df <- 
structure(list(Person = structure(1:3, .Label = c("Andrew", "John", 
"Mike"), class = "factor"), Age = c(25, 34, 21), ColA_1 = c(1, 
5, 7), ColA_2 = c(5, 0, 9), ColA_3 = c(4, 4, 1), ColB_1 = c(16, 
55, 37), ColB_2 = c(25, 14, 39), ColB_3 = c(43, 64, 31)), .Names = c("Person", 
"Age", "ColA_1", "ColA_2", "ColA_3", "ColB_1", "ColB_2", "ColB_3"
), row.names = c(NA, -3L), class = "data.frame")

Reshape data from long to wide with multiple measure columns using spread() or other reshape functions

A tidyr solution below. You need to gather the region into a single column to be able to spread it.

library(tidyr)
data %>% gather(region,val,-age) %>% spread(age,val)  

#   region age 0 age 1 age 10 age 11 age 12 age 2 age 3 age 4 age 5 age 6 age 7 age 8 age 9
# 1     X1     2     2      6      3      3     2     4     7    12    19    22    18    11
# 2     X2     2     2      7      4      3     3     4     8    14    21    24    20    12

Reshaping multiple sets of measurement columns (wide format) into single columns (long format)

Reshaping from wide to long format with multiple value/measure columns is possible with the function pivot_longer() of the tidyr package since version 1.0.0.

This is superior to the previous tidyr strategy of gather() than spread() (see answer by @AndrewMacDonald), because the attributes are no longer dropped (dates remain dates and numerics remain numerics in the example below).

library("tidyr")
library("magrittr")

a <- structure(list(ID = 1L, 
                    DateRange1Start = structure(7305, class = "Date"), 
                    DateRange1End = structure(7307, class = "Date"), 
                    Value1 = 4.4, 
                    DateRange2Start = structure(7793, class = "Date"),
                    DateRange2End = structure(7856, class = "Date"), 
                    Value2 = 6.2, 
                    DateRange3Start = structure(9255, class = "Date"), 
                    DateRange3End = structure(9653, class = "Date"), 
                    Value3 = 3.3),
               row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"))

pivot_longer() (counterpart: pivot_wider()) works similar to gather().
However, it offers additional functionality such as multiple value columns.
With only one value column, all colnames of the wide data set would go into one long column with the name given in names_to.
For multiple value columns, names_to may receive multiple new names.

This is easiest if all column names follow a specific pattern like Start_1, End_1, Start_2, etc.
Therefore, I renamed the columns in the first step.

(names(a) <- sub("(\\d)(\\w*)", "\\2_\\1", names(a)))
#>  [1] "ID"               "DateRangeStart_1" "DateRangeEnd_1"  
#>  [4] "Value_1"          "DateRangeStart_2" "DateRangeEnd_2"  
#>  [7] "Value_2"          "DateRangeStart_3" "DateRangeEnd_3"  
#> [10] "Value_3"

pivot_longer(a, 
             cols = -ID, 
             names_to = c(".value", "group"),
             # names_prefix = "DateRange",
             names_sep = "_")
#> # A tibble: 3 x 5
#>      ID group DateRangeEnd DateRangeStart Value
#>   <int> <chr> <date>       <date>         <dbl>
#> 1     1 1     1990-01-03   1990-01-01       4.4
#> 2     1 2     1991-07-06   1991-05-04       6.2
#> 3     1 3     1996-06-06   1995-05-05       3.3

Alternatively, the reshape may be done using a pivot spec that offers finer control (see link below):

spec <- a %>%
    build_longer_spec(cols = -ID) %>%
    dplyr::transmute(.name = .name,
                     group = readr::parse_number(name),
                     .value = stringr::str_extract(name, "Start|End|Value"))

pivot_longer(a, spec = spec)

^{Created on 2019-03-26 by the reprex package (v0.2.1)}

See also: https://tidyr.tidyverse.org/articles/pivot.html

Reshaping Data with Multiple Sets of Column Variables

Just using base R functions, you can do

subset(reshape(df, list(paste0("name", 1:3), paste0("age", 1:3)), 
    v.names=c("name","age"),
    direction="long"), !is.na(name), select=-c(time, id))

to get

            city state name age
1.1     New York    NY  Tim  40
1.2     New York    NY  Bob  30
2.2 Philadelphia    PA  Jim  29
3.2      Chicago    IL Bill  34
3.3      Chicago    IL Jeff  27

Convert data from long format to wide format with multiple measure columns

In order to handle multiple variables like you want, you need to melt the data you have before casting it.

library("reshape2")

dcast(melt(my.df, id.vars=c("ID", "TIME")), ID~variable+TIME)

which gives

  ID X_1 X_2 X_3 X_4 X_5 Y_1 Y_2 Y_3 Y_4 Y_5
1  A   1   4   7  10  13  16  19  22  25  28
2  B   2   5   8  11  14  17  20  23  26  29
3  C   3   6   9  12  15  18  21  24  27  30

EDIT based on comment:

The data frame

num.id = 10 
num.time=10 
my.df <- data.frame(ID=rep(LETTERS[1:num.id], num.time), 
                    TIME=rep(1:num.time, each=num.id), 
                    X=1:(num.id*num.time), 
                    Y=(num.id*num.time)+1:(2*length(1:(num.id*num.time))))

gives a different result (all entries are 2) because the ID/TIME combination does not indicate a unique row. In fact, there are two rows with each ID/TIME combinations. reshape2 assumes a single value for each possible combination of the variables and will apply a summary function to create a single variable is there are multiple entries. That is why there is the warning

Aggregation function missing: defaulting to length

You can get something that works if you add another variable which breaks that redundancy.

my.df$cycle <- rep(1:2, each=num.id*num.time)
dcast(melt(my.df, id.vars=c("cycle", "ID", "TIME")), cycle+ID~variable+TIME)

This works because cycle/ID/time now uniquely defines a row in my.df.

Reshaping a Data Frame with More Than One Measure Variable