Reshape from Long to Wide and Create Columns with Binary Value

Reshape from long to wide and create columns with binary value

Using reshape2 we can dcast from long to wide.

As you only want a binary outcome we can unique the data first

library(reshape2)

si <- unique(studentInfo)
dcast(si, formula = StudentID ~ Subject, fun.aggregate = length)

#  StudentID English History Maths Science
#1         1       1       0     1       1
#2         2       0       0     1       0
#3         3       0       1     0       0

Another approach using tidyr and dplyr is

library(tidyr)
library(dplyr)

studentInfo %>%
  mutate(yesno = 1) %>%
  distinct %>%
  spread(Subject, yesno, fill = 0)

#  StudentID English History Maths Science
#1         1       1       0     1       1
#2         2       0       0     1       0
#3         3       0       1     0       0

Although I'm not a fan (yet) of tidyr syntax...

reordering my reshape: long to wide with pivot_wider, different column order

It may be easier with names_glue in pivot_wider

library(dplyr)
library(tidyr)
df %>% 
   pivot_wider(names_from = Set, values_from = c(Street, State), 
       names_glue = "{tools::toTitleCase(Set)}_{.value}") %>%   
   dplyr::select(ID, Name, order(readr::parse_number(names(.)[-(1:2)])) + 2)

-output

# A tibble: 3 × 8
  ID    Name  Set1_Street Set1_State Set2_Street Set2_State Set3_Street Set3_State
  <chr> <chr> <chr>       <chr>      <chr>       <chr>      <chr>       <chr>     
1 A     mary  123 St      al         234 St      nc         <NA>        <NA>      
2 B     berry 543 St      fl         492 st      ca         231 st      md        
3 C     paul  492 st      tx         231 st      vt         <NA>        <NA>

Pandas long to wide reshape, by two variables

A simple pivot might be sufficient for your needs but this is what I did to reproduce your desired output:

df['idx'] = df.groupby('Salesman').cumcount()

Just adding a within group counter/index will get you most of the way there but the column labels will not be as you desired:

print df.pivot(index='Salesman',columns='idx')[['product','price']]

        product              price        
idx            0     1     2      0   1   2
Salesman                                   
Knut         bat  ball  wand      5   1   3
Steve        pen   NaN   NaN      2 NaN NaN

To get closer to your desired output I added the following:

df['prod_idx'] = 'product_' + df.idx.astype(str)
df['prc_idx'] = 'price_' + df.idx.astype(str)

product = df.pivot(index='Salesman',columns='prod_idx',values='product')
prc = df.pivot(index='Salesman',columns='prc_idx',values='price')

reshape = pd.concat([product,prc],axis=1)
reshape['Height'] = df.set_index('Salesman')['Height'].drop_duplicates()
print reshape

         product_0 product_1 product_2  price_0  price_1  price_2  Height
Salesman                                                                 
Knut           bat      ball      wand        5        1        3       6
Steve          pen       NaN       NaN        2      NaN      NaN       5

Edit: if you want to generalize the procedure to more variables I think you could do something like the following (although it might not be efficient enough):

df['idx'] = df.groupby('Salesman').cumcount()

tmp = []
for var in ['product','price']:
    df['tmp_idx'] = var + '_' + df.idx.astype(str)
    tmp.append(df.pivot(index='Salesman',columns='tmp_idx',values=var))

reshape = pd.concat(tmp,axis=1)

@Luke said:

I think Stata can do something like this with the reshape command.

You can but I think you also need a within group counter to get the reshape in stata to get your desired output:

     +-------------------------------------------+
     | salesman   idx   height   product   price |
     |-------------------------------------------|
  1. |     Knut     0        6       bat       5 |
  2. |     Knut     1        6      ball       1 |
  3. |     Knut     2        6      wand       3 |
  4. |    Steve     0        5       pen       2 |
     +-------------------------------------------+

If you add idx then you could do reshape in stata:

reshape wide product price, i(salesman) j(idx)

Reshape categorical data in columns to binary data in rows using R

There is probably a shorter solution for this, but as a quick fix:

library(stringr)
library(dplyr)
library(reshape2)

#initial melt
df2 <- melt(df[-6], id.vars='team')
#split the variable column to get the 'away and home' column
df2 <- cbind(df2, str_split_fixed(df2$variable, "\\.", 2))
#create the wins column
df2$win <- ifelse(df2$`1` == 'wins',1,0)
#replicate the number of rows according to value
df2 <- df2[rep(1:nrow(df2), df2$value),]
#remove unnecessary columns
df2$variable <- df2$value <- df2$`1` <- NULL
#final group by to add the game column
df2 %>% group_by(team) %>% mutate(game = 1:n()) %>% arrange(team)

Output:

Source: local data frame [18 x 4]
Groups: team [4]

     team      2   win  game
   (fctr) (fctr) (dbl) (int)
1       a   away     1     1
2       a   away     0     2
3       a   away     0     3
4       b   home     1     1
5       b   away     1     2
6       b   home     0     3
7       b   away     0     4
#...and so on

From long (with multiple categories) to wide format

We can do this by splitting the 'type' at the delimiter with separate_rows, create an column of 1s. and 'spread` it to 'wide' format

library(tidyverse)
dataset_long %>% 
    separate_rows(type) %>% 
    mutate(type = paste0('type_', type)) %>% 
    mutate(ind = 1) %>% 
    spread(type, ind, fill = 0)
#  id type_A type_B type_C
#1  1      1      0      0
#2  2      1      1      0
#3  3      1      0      1
#4  4      0      0      1
#5  5      1      0      0

Or using strsplit and table from base R

table(stack(setNames(strsplit(as.character(dataset_long$type),
        ', '),  dataset_long$id))[2:1])

Or in a single line with mtabulate

library(qdapTools)
mtabulate(setNames(strsplit(as.character(dataset_long$type), ', '), dataset_long$id))

Reshape multiple categorical variables to binary response variables

How much spice is too much? Here is a solution via tidyr:

library(dplyr)
library(tidyr)

mydata %>%
  gather(actor,name,starts_with("actor")) %>%
  mutate(present = 1) %>%
  select(-actor) %>%
  spread(name,present,fill = 0)

       movie Jack Kate Leo
 1 Departed    1    0   1
 2  Titanic    0    1   1

reshape wide long with key specifying levels of grouping variable

We can use pivot_longer

library(dplyr)
library(tidyr)
library(stringr)
v1 <- c("part.1", "part.2", "part.3")
pat <- sprintf("^(outcome\\d*).*(%s).*$", str_c(v1, collapse="|"))
df1 %>% 
   pivot_longer(cols = starts_with('outcome'), 
    names_to = c(".value", "strata"), 
     names_pattern = pat)

-output

# A tibble: 21 × 5
   group technique  strata outcome1 outcome2
   <chr> <chr>      <chr>     <int>    <int>
 1 A     "attack"   part.1       24       14
 2 A     "attack"   part.2       52      234
 3 A     "attack"   part.3       14      234
 4 B     "defenese" part.1     1234      234
 5 B     "defenese" part.2      321      234
 6 B     "defenese" part.3       23      234
 7 A     "attack "  part.1       15      145
 8 A     "attack "  part.2       23      234
 9 A     "attack "  part.3        3      234
10 B     "defense " part.1      234        4
# … with 11 more rows

NOTE: There was a typo in the column names i.e. third column should be

names(df1)[3] <- 'outcome1.part.1'

Convert Duplicated data from long to wide, while preserving key-value pairs in R

Two pivots do it:

library(dplyr)
library(tidyr) # pivot_*
df2 %>%
  pivot_longer(-id, names_pattern = "(.*?)([0-9]+)", names_to = c(".value", "iter")) %>%
  group_by(id) %>%
  mutate(iter = row_number()) %>%
  pivot_wider(id, names_from = "iter", values_from = c("key", "value"), names_sep = "") %>%
  ungroup()
# # A tibble: 3 x 31
#      id key1  key2  key3  key4  key5  key6  key7  key8  key9  key10 key11 key12 key13 key14 key15 value1 value2 value3 value4 value5 value6 value7 value8 value9 value10 value11 value12 value13 value14 value15
#   <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
# 1     1 <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>      23     45     89    786     10     NA     NA     NA     NA      NA      NA      NA      NA      NA      NA
# 2     2 UJD02 JFF00 <NA>  <NA>  <NA>  UJD05 <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>      NA     NA     NA    670     NA     11     25     89     12      51      NA      NA      NA      NA      NA
# 3     3 UJD05 TPX10 DV071 <NA>  <NA>  KFC10 DR036 UGC12 UEN05 XXA00 DR036 UJD05 JFF00 TPX10 <NA>      45     23    103    987     29     67     NA   6700     67      15       1      78      50      14       3

Reshape from Long to Wide and Create Columns with Binary Value