Transpose Only Certain Columns in Data.Frame

Transpose only few columns and rows from a dataframe

melt

df.melt(['Type', 'Class'], var_name='Date', value_name='Value')

Type Class Date Value
0 Plant1 Plant 24/01/2018 5498
1 Plant2 Plant 24/01/2018 3416
2 Veg1 Vegetable 24/01/2018 5426
3 Veg2 Vegetable 24/01/2018 5317
4 Plant1 Plant 25/01/2018 5998
5 Plant2 Plant 25/01/2018 4844
6 Veg1 Vegetable 25/01/2018 5464
7 Veg2 Vegetable 25/01/2018 4616
8 Plant1 Plant 26/01/2018 5815
9 Plant2 Plant 26/01/2018 4991
10 Veg1 Vegetable 26/01/2018 3756
11 Veg2 Vegetable 26/01/2018 7528


set_index + stack

(df.set_index(['Type', 'Class'])
.stack()
.rename_axis(['Type', 'Class', 'Date'])
.reset_index(name='Value')
)

Type Class Date Value
0 Plant1 Plant 24/01/2018 5498
1 Plant1 Plant 25/01/2018 5998
2 Plant1 Plant 26/01/2018 5815
3 Plant2 Plant 24/01/2018 3416
4 Plant2 Plant 25/01/2018 4844
5 Plant2 Plant 26/01/2018 4991
6 Veg1 Vegetable 24/01/2018 5426
7 Veg1 Vegetable 25/01/2018 5464
8 Veg1 Vegetable 26/01/2018 3756
9 Veg2 Vegetable 24/01/2018 5317
10 Veg2 Vegetable 25/01/2018 4616
11 Veg2 Vegetable 26/01/2018 7528

Transpose only certain columns in data.frame

The basic idea would be to go to a "long" format first, and then go into a "wide" format.

Here are a few ways to do this....

melt + dcast

library(data.table) ## or library(reshape2)
dcast(melt(as.data.table(mydf), id.vars = c("am", "group")),
group + variable ~ am, value.var = "value")

recast

(This is basically the same as melt+cast, but in one step.)

library(reshape2)
recast(mydf, group + variable ~ am, id.var = c("am", "group"))

gather + spread

library(dplyr)
library(tidyr)

mydf %>%
gather(key, value, v1:v4) %>%
spread(am, value)

pivot_longer + pivot_wider

library(dplyr)
library(tidyr)

dd %>%
mutate(across(v1:v4, as.character)) %>%
pivot_longer(names_to = "metric", cols=v1:v4) %>%
pivot_wider(id_cols = c(group, metric), values_from = value, names_from=am)

reshape

reshape(cbind(mydf[c(1, 2)], stack(mydf[-c(1, 2)])), 
direction = "wide", idvar = c("group", "ind"), timevar = "am")

Transpose specific columns in data frame in R

We could use pivot_wider

library(tidyr)
pivot_wider(df_in, names_from = trans, values_from = score)
# A tibble: 2 x 8
GENE ID tra1 tr2 tr3 tra2 tr15 tr1b
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ENS777 1122O 1.2 0 0 1.6 3.3 0
2 ENS999 1122O 0 0 0 1.1 2.3 0

data

df_in <- structure(list(GENE = c("ENS777", "ENS777", "ENS777", "ENS777", 
"ENS777", "ENS777", "ENS999", "ENS999", "ENS999", "ENS999", "ENS999",
"ENS999"), ID = c("1122O", "1122O", "1122O", "1122O", "1122O",
"1122O", "1122O", "1122O", "1122O", "1122O", "1122O", "1122O"
), trans = c("tra1", "tr2", "tr3", "tra2", "tr15", "tr1b", "tra1",
"tr2", "tr3", "tra2", "tr15", "tr1b"), score = c(1.2, 0, 0, 1.6,
3.3, 0, 0, 0, 0, 1.1, 2.3, 0)), class = "data.frame", row.names = c(NA,
-12L))

how to transpose only part of a dataframe or interchange rows and columns?

You can first set ['letter', 'branch'] as index and then use stack().

(
df.set_index(['letter', 'branch']).T
.stack()
.reset_index(level=0)
.sort_index()
.rename(columns={'level_0':'category'})
)

letter category a b
branch
A var1 1 3
A var2 2 4
A var3 4 5
B var1 6 2
B var2 7 3
B var3 9 8

Transpose only certain columns - data formating

We can use pivot_longer

library(tidyr)
pivot_longer(df, cols = -year,
names_to = 'company', values_to = 'Variable')

-output

# A tibble: 8 × 3
year company Variable
<dbl> <chr> <dbl>
1 2010 A 10
2 2010 B 11
3 2011 A 11
4 2011 B 12
5 2010 A 10
6 2010 B 11
7 2011 A 11
8 2011 B 12

How to transpose certain set of columns

You can use gather from the tidyr package, and arrange from dplyr:

library(dplyr)
library(tidyr)
old_df= data.frame(
id=1:3,
DATE=c('20/12/1965','11/10/1970','03/12/1985'),
TRT=c(1,2,1),
AT1=c(40,89,50),
AT2=c(18,43,13),
AT3=c(23,16,10),
BECT1=c(80,50,60),
BECT2=c(78,40,48),
BECT3=c(60,97,56),
C0T1=c(43,45,78),
C0T2=c(42,NA,48))

new_df <- old_df %>%
gather(AT1, AT2, AT3, key = "variable1", value = "value1") %>%
gather(BECT1, BECT2, BECT3, key = "variable2", value = "value2") %>%
gather(C0T1, C0T2, key = "variable3", value = "value3") %>%
arrange(id, DATE, TRT, variable1, variable2, variable3)

id DATE TRT variable1 value1 variable2 value2 variable3 value3
1 1 20/12/1965 1 AT1 40 BECT1 80 C0T1 43
2 1 20/12/1965 1 AT1 40 BECT1 80 C0T2 42
3 1 20/12/1965 1 AT1 40 BECT2 78 C0T1 43
4 1 20/12/1965 1 AT1 40 BECT2 78 C0T2 42
5 1 20/12/1965 1 AT1 40 BECT3 60 C0T1 43
6 1 20/12/1965 1 AT1 40 BECT3 60 C0T2 42
7 1 20/12/1965 1 AT2 18 BECT1 80 C0T1 43
8 1 20/12/1965 1 AT2 18 BECT1 80 C0T2 42
9 1 20/12/1965 1 AT2 18 BECT2 78 C0T1 43
10 1 20/12/1965 1 AT2 18 BECT2 78 C0T2 42
11 1 20/12/1965 1 AT2 18 BECT3 60 C0T1 43
12 1 20/12/1965 1 AT2 18 BECT3 60 C0T2 42
13 1 20/12/1965 1 AT3 23 BECT1 80 C0T1 43
14 1 20/12/1965 1 AT3 23 BECT1 80 C0T2 42
15 1 20/12/1965 1 AT3 23 BECT2 78 C0T1 43
16 1 20/12/1965 1 AT3 23 BECT2 78 C0T2 42
17 1 20/12/1965 1 AT3 23 BECT3 60 C0T1 43
18 1 20/12/1965 1 AT3 23 BECT3 60 C0T2 42
19 2 11/10/1970 2 AT1 89 BECT1 50 C0T1 45
20 2 11/10/1970 2 AT1 89 BECT1 50 C0T2 NA
21 2 11/10/1970 2 AT1 89 BECT2 40 C0T1 45
22 2 11/10/1970 2 AT1 89 BECT2 40 C0T2 NA
23 2 11/10/1970 2 AT1 89 BECT3 97 C0T1 45
24 2 11/10/1970 2 AT1 89 BECT3 97 C0T2 NA
25 2 11/10/1970 2 AT2 43 BECT1 50 C0T1 45
26 2 11/10/1970 2 AT2 43 BECT1 50 C0T2 NA
27 2 11/10/1970 2 AT2 43 BECT2 40 C0T1 45
28 2 11/10/1970 2 AT2 43 BECT2 40 C0T2 NA
29 2 11/10/1970 2 AT2 43 BECT3 97 C0T1 45
30 2 11/10/1970 2 AT2 43 BECT3 97 C0T2 NA
31 2 11/10/1970 2 AT3 16 BECT1 50 C0T1 45
32 2 11/10/1970 2 AT3 16 BECT1 50 C0T2 NA
33 2 11/10/1970 2 AT3 16 BECT2 40 C0T1 45
34 2 11/10/1970 2 AT3 16 BECT2 40 C0T2 NA
35 2 11/10/1970 2 AT3 16 BECT3 97 C0T1 45
36 2 11/10/1970 2 AT3 16 BECT3 97 C0T2 NA
37 3 03/12/1985 1 AT1 50 BECT1 60 C0T1 78
38 3 03/12/1985 1 AT1 50 BECT1 60 C0T2 48
39 3 03/12/1985 1 AT1 50 BECT2 48 C0T1 78
40 3 03/12/1985 1 AT1 50 BECT2 48 C0T2 48
41 3 03/12/1985 1 AT1 50 BECT3 56 C0T1 78
42 3 03/12/1985 1 AT1 50 BECT3 56 C0T2 48
43 3 03/12/1985 1 AT2 13 BECT1 60 C0T1 78
44 3 03/12/1985 1 AT2 13 BECT1 60 C0T2 48
45 3 03/12/1985 1 AT2 13 BECT2 48 C0T1 78
46 3 03/12/1985 1 AT2 13 BECT2 48 C0T2 48
47 3 03/12/1985 1 AT2 13 BECT3 56 C0T1 78
48 3 03/12/1985 1 AT2 13 BECT3 56 C0T2 48
49 3 03/12/1985 1 AT3 10 BECT1 60 C0T1 78
50 3 03/12/1985 1 AT3 10 BECT1 60 C0T2 48
51 3 03/12/1985 1 AT3 10 BECT2 48 C0T1 78
52 3 03/12/1985 1 AT3 10 BECT2 48 C0T2 48
53 3 03/12/1985 1 AT3 10 BECT3 56 C0T1 78
54 3 03/12/1985 1 AT3 10 BECT3 56 C0T2 48

You can read more about gathering in section 12.3.1 of Hadley Wickham's r4ds book

Pandas - transpose one column

Since you aren't performing an aggregation, pd.DataFrame.pivot should be preferred to groupby / pivot_table:

res = df.pivot(index='date', columns='name', values='quantity')

print(res)

name A B C
date
1/1/2018 5 6 7
1/2/2018 9 8 6

If you wish you can use reset_index to elevate date to a column.

Transpose specific columns

If you convert to a dataframe you can use spread from tidyr

library(tidyr)
numbers_df <- as.data.frame(numbers,stringsAsFactors=FALSE)
numbers_transpose <- numbers_df %>% spread(key = cause, value = value)


Related Topics



Leave a reply



Submit