How to Divide Between Groups of Rows Using Dplyr

How to divide between groups of rows using dplyr?

Try:

x %>% 
group_by(name) %>%
summarise(value = value[condition == "B"] / value[condition == "A"])

Which gives:

#Source: local data frame [4 x 2]
#
# name value
# (fctr) (dbl)
#1 a 5
#2 b 5
#3 c 5
#4 d 5

How to divide between groups of rows using dplyr

You could remove the groups which do not have "A" or "B" and then divide.

library(dplyr)

x %>%
group_by(name) %>%
filter(all(c('A', 'B') %in% condition)) %>%
summarise(value = value[condition == "B"] / value[condition == "A"])

# name value
# <fct> <dbl>
#1 a 5
#2 d 5

How to divide between groups of rows using dplyr with multiple columns?

I figured out a way to do it.

x %>% gather(variable, value, -(name:condition)) %>%
group_by(variable,name) %>%
summarise(value = value[condition == "B"] / value[condition == "A"]) %>%
spread(variable,value)

# name value1 value2
# <fct> <dbl> <dbl>
# 1 a 5 5
# 2 b 5 5
# 3 c 5 5
# 4 d 5 5

How to divide variables between groups of rows using dplyr without listing them?

We can use summarise_atto apply the same operation to many columns.

library(dplyr)

df2 <- df %>%
group_by(id) %>%
arrange(condition) %>%
summarise_at(vars(-condition), funs(first(.)/last(.))) %>%
ungroup()
df2
# # A tibble: 3 x 4
# id gene1 gene2 geneN
# <fct> <dbl> <dbl> <dbl>
# 1 a 0.524 2.28 0.654
# 2 b 1.65 0.616 1.38
# 3 c 0.578 2.00 2.17

How to divide combinations of rows using dplyr or another method in R?

You could use reshape2 to get the data in a format that is easier to work with.

The code below separates out the sp.1 and sp.2 data. acast is used so that each dataframe consists of a single row per site, and each column is a unique sample with the values being from sp.1 and sp.2.

Name the columns something unique and combine the dataframes with cbind.

Now each column can be compared based on your requirements.

library(dplyr)
library(reshape2)

##your setup
site <- rep(1:4, each = 8, len = 32)
rep <- rep(1:8, times = 4, len = 32)
treatment <- rep(c("A.low","A.low","A.high","A.high","A.mix","A.mix","B.mix","B.mix"), 4)
sp.1 <- sample(0:3,size=32,replace=TRUE)
sp.2 <- sample(0:2,size=32,replace=TRUE)
df.dummy <- data.frame(site, rep, treatment, sp.1, sp.2)

##create unique ids and create a dataframe containing 1 value column
sp1 <- df.dummy %>% mutate(id = paste(rep, treatment, sep = "_")) %>% select(id, site, rep, treatment, sp.1)
sp2 <- df.dummy %>% mutate(id = paste(rep, treatment, sep = "_")) %>% select(id, site, rep, treatment, sp.2)

##reshape the data so that each treament and replicate is assigned a single column
##each row will be a single site
##each column will contain the values from sp.1 or sp.2
sp1 <- reshape2::acast(data = sp1, formula = site ~ id)
sp2 <- reshape2::acast(data = sp2, formula = site ~ id)

##rename columns something sensible and unique
colnames(sp1) <- c("low.1.sp1", "low.2.sp1", "high.3.sp1", "high.4.sp1",
"mix.5.sp1", "mix.6.sp1", "mix.7.sp1", "mix.8.sp1")
colnames(sp2) <- c("low.1.sp2", "low.2.sp2", "high.3.sp2", "high.4.sp2",
"mix.5.sp2", "mix.6.sp2", "mix.7.sp2", "mix.8.sp2")

##combine datasets
dat <- sp1 %>% cbind(sp2)

##choose which columns to compare. Some examples shown below
dat <- dat %>% mutate(low.1.sp1/high.3.sp1, low.1.sp1/high.4.sp1,
low.2.sp1/high.3.sp2)

R dplyr divide between two rows after group by

You can do:

x %>%
group_by(name) %>%
summarise(value = last(value)/first(value))

name value
<fct> <dbl>
1 a 5
2 b 5
3 c 5
4 d 5

Use `dplyr` to divide rows by group

Here's a total dplyr approach:

library(dplyr) #version >= 1.0.0
OrchardSprays %>%
group_by(treatment) %>%
summarise(decrease = sum(decrease)) %>%
mutate(decrease = decrease + seq(10, 80, 10),
rowpos = "total",
colpos = "total") %>%
bind_rows(mutate(OrchardSprays, across(rowpos:colpos, as.character))) %>%
group_by(treatment) %>%
mutate(treatment_decrease = decrease / decrease[rowpos == "total"])
# A tibble: 72 x 5
# Groups: treatment [8]
treatment decrease rowpos colpos treatment_decrease
<fct> <dbl> <chr> <chr> <dbl>
1 A 47 total total 1
2 B 81 total total 1
3 C 232 total total 1
4 D 320 total total 1
5 E 555 total total 1
6 F 612 total total 1
7 G 618 total total 1
8 H 802 total total 1
9 D 57 1 1 0.178
10 E 95 2 1 0.171
# … with 62 more rows

Divide between groups of rows using group_by

We could first calculate the sum of each Group for each Symbol and then divide within each other based on value of 'MostUp' and 'LessUp'.

library(dplyr)

df %>%
group_by(SYMBOL, variable, Sample, IDs, Group) %>%
summarise(value = sum(value)) %>%
summarise(value = value[Group == 'MostUp']/value[Group == 'LessUp'])

# SYMBOL variable Sample IDs value
# <fct> <fct> <fct> <int> <dbl>
#1 TLR4 MMRF_2613_1_BM Baseline 2613 1.29
#2 TLR8 MMRF_2613_1_BM Baseline 2613 0.585

To calculate t.test between groups we can do :

df1 <- df %>%
group_by(SYMBOL, variable, Sample, IDs) %>%
summarise(value = list(t.test(value[Group == 'MostUp'],
value[Group == 'LessUp'])))

df1
# A tibble: 2 x 5
# Groups: SYMBOL, variable, Sample [2]
# SYMBOL variable Sample IDs value
# <fct> <fct> <fct> <int> <list>
#1 TLR4 MMRF_2613_1_BM Baseline 2613 <htest>
#2 TLR8 MMRF_2613_1_BM Baseline 2613 <htest>

data

df <- structure(list(SYMBOL = structure(c(2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L), .Label = c("TLR4", "TLR8"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "MMRF_2613_1_BM", class = "factor"),
value = c(3.186233, 5.471014, 2.917965, 2.147028, 7.497424,
4.16523, 7.136523, 7.96523), Sample = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Baseline", class = "factor"),
IDs = c(2613L, 2613L, 2613L, 2613L, 2613L, 2613L, 2613L,
2613L), Group = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L
), .Label = c("LessUp", "MostUp"), class = "factor")),
class = "data.frame", row.names = c(NA, -8L))

Iterative dividing by specific row for grouped dplyr data in R

Assuming that the level is not duplicated within 'group', after grouping by 'group', extract the 'value' that corresponds to the 'c' level (value[level == 'c']) and use that to divide the 'value' column

library(dplyr)
d %>%
group_by(group) %>%
mutate(value = value/value[level == 'c'])
# A tibble: 12 x 3
# Groups: group [4]
# group level value
# <chr> <chr> <dbl>
# 1 blue a 1.67
# 2 blue b 1.33
# 3 blue c 1
# 4 red a 5
# 5 red b 4
# 6 red c 1
# 7 yellow a 6
# 8 yellow b 5
# 9 yellow c 1
#10 green a 2.75
#11 green b 2.25
#12 green c 1

Or if there are multiple 'c', then use match to get the index of first occurrence of 'c'

d %>%
group_by(group) %>%
mutate(value = value/value[match('c', level)])

Or using base R

d$value <-  d$value/with(subset(d, level == 'c'), setNames(value, group)[d$group])

data

d <- structure(list(group = c("blue", "blue", "blue", "red", "red", 
"red", "yellow", "yellow", "yellow", "green", "green", "green"
), level = c("a", "b", "c", "a", "b", "c", "a", "b", "c", "a",
"b", "c"), value = c(5, 4, 3, 10, 8, 2, 6, 5, 1, 11, 9, 4)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))

Dividing rows by rows using dplyr and tidyr

You can use the following :

df %>% 
dplyr::group_by(Block) %>%
dplyr::summarise(across(where(is.numeric),
list(CF = ~.[Treatment=="CF"] / .[Treatment=="Control"],
LR = ~.[Treatment=="LR"] / .[Treatment=="Control"])))

# Block var1_CF var1_LR var2_CF var2_LR
# <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 1.22 0.556 Inf Inf
#2 2 0.75 0.917 0 6.25
#3 3 0.867 0.333 0 0
#4 4 0.688 0.688 3.33 0

If you want the output to follow the same format as shown in your expected output you can use pivot_longer.

df %>% 
dplyr::group_by(Block) %>%
dplyr::summarise(across(where(is.numeric),
list(CF = ~.[Treatment=="CF"] / .[Treatment=="Control"],
LR = ~.[Treatment=="LR"] / .[Treatment=="Control"]))) %>%
tidyr::pivot_longer(cols = -Block,
names_to = c('.value', 'Treatment'),
names_sep = '_')

# Block Treatment var1 var2
# <dbl> <chr> <dbl> <dbl>
#1 1 CF 1.22 Inf
#2 1 LR 0.556 Inf
#3 2 CF 0.75 0
#4 2 LR 0.917 6.25
#5 3 CF 0.867 0
#6 3 LR 0.333 0
#7 4 CF 0.688 3.33
#8 4 LR 0.688 0


Related Topics



Leave a reply



Submit