How to Divide Between Groups of Rows Using Dplyr

How to divide between groups of rows using dplyr?

Try:

x %>% 
  group_by(name) %>%
  summarise(value = value[condition == "B"] / value[condition == "A"])

Which gives:

#Source: local data frame [4 x 2]
#
#    name value
#  (fctr) (dbl)
#1      a     5
#2      b     5
#3      c     5
#4      d     5

How to divide between groups of rows using dplyr

You could remove the groups which do not have "A" or "B" and then divide.

library(dplyr)

x %>%
  group_by(name) %>%
  filter(all(c('A', 'B') %in% condition)) %>%
  summarise(value = value[condition == "B"] / value[condition == "A"])

#  name  value
#  <fct> <dbl>
#1 a         5
#2 d         5

How to divide between groups of rows using dplyr with multiple columns?

I figured out a way to do it.

x %>% gather(variable, value, -(name:condition)) %>%
group_by(variable,name) %>%
summarise(value = value[condition == "B"] / value[condition == "A"]) %>%
spread(variable,value)

#  name  value1 value2
#   <fct>  <dbl>  <dbl>
# 1 a          5      5
# 2 b          5      5
# 3 c          5      5
# 4 d          5      5

How to divide variables between groups of rows using dplyr without listing them?

We can use summarise_atto apply the same operation to many columns.

library(dplyr)

df2 <- df %>%
  group_by(id) %>%
  arrange(condition) %>%
  summarise_at(vars(-condition), funs(first(.)/last(.))) %>%
  ungroup()
df2
# # A tibble: 3 x 4
#   id    gene1 gene2 geneN
#   <fct> <dbl> <dbl> <dbl>
# 1 a     0.524 2.28  0.654
# 2 b     1.65  0.616 1.38 
# 3 c     0.578 2.00  2.17

How to divide combinations of rows using dplyr or another method in R?

You could use reshape2 to get the data in a format that is easier to work with.

The code below separates out the sp.1 and sp.2 data. acast is used so that each dataframe consists of a single row per site, and each column is a unique sample with the values being from sp.1 and sp.2.

Name the columns something unique and combine the dataframes with cbind.

Now each column can be compared based on your requirements.

library(dplyr)
library(reshape2)

##your setup
site <- rep(1:4, each = 8, len = 32)
rep <- rep(1:8, times = 4, len = 32)
treatment <- rep(c("A.low","A.low","A.high","A.high","A.mix","A.mix","B.mix","B.mix"), 4)
sp.1 <- sample(0:3,size=32,replace=TRUE)
sp.2 <- sample(0:2,size=32,replace=TRUE)
df.dummy <- data.frame(site, rep, treatment, sp.1, sp.2)

##create unique ids and create a dataframe containing 1 value column
sp1 <- df.dummy %>% mutate(id = paste(rep, treatment, sep = "_")) %>% select(id, site, rep, treatment, sp.1)
sp2 <- df.dummy %>% mutate(id = paste(rep, treatment, sep = "_")) %>% select(id, site, rep, treatment, sp.2)

##reshape the data so that each treament and replicate is assigned a single column
##each row will be a single site
##each column will contain the values from sp.1 or sp.2
sp1 <- reshape2::acast(data = sp1, formula = site ~ id)
sp2 <- reshape2::acast(data = sp2, formula = site ~ id)

##rename columns something sensible and unique
colnames(sp1) <- c("low.1.sp1", "low.2.sp1", "high.3.sp1", "high.4.sp1",
                   "mix.5.sp1", "mix.6.sp1", "mix.7.sp1", "mix.8.sp1")
colnames(sp2) <- c("low.1.sp2", "low.2.sp2", "high.3.sp2", "high.4.sp2",
                   "mix.5.sp2", "mix.6.sp2", "mix.7.sp2", "mix.8.sp2")

##combine datasets
dat <- sp1 %>% cbind(sp2)

##choose which columns to compare. Some examples shown below
dat <-  dat %>% mutate(low.1.sp1/high.3.sp1, low.1.sp1/high.4.sp1,
                       low.2.sp1/high.3.sp2)

R dplyr divide between two rows after group by

You can do:

x %>%
 group_by(name) %>%
 summarise(value = last(value)/first(value))

  name  value
  <fct> <dbl>
1 a         5
2 b         5
3 c         5
4 d         5

Use `dplyr` to divide rows by group

Here's a total dplyr approach:

library(dplyr) #version >= 1.0.0
OrchardSprays %>% 
  group_by(treatment) %>%
  summarise(decrease = sum(decrease)) %>%
  mutate(decrease = decrease + seq(10, 80, 10),
         rowpos = "total",
         colpos = "total") %>% 
  bind_rows(mutate(OrchardSprays, across(rowpos:colpos, as.character))) %>%
  group_by(treatment) %>%
  mutate(treatment_decrease = decrease / decrease[rowpos == "total"])
# A tibble: 72 x 5
# Groups:   treatment [8]
   treatment decrease rowpos colpos treatment_decrease
   <fct>        <dbl> <chr>  <chr>               <dbl>
 1 A               47 total  total               1    
 2 B               81 total  total               1    
 3 C              232 total  total               1    
 4 D              320 total  total               1    
 5 E              555 total  total               1    
 6 F              612 total  total               1    
 7 G              618 total  total               1    
 8 H              802 total  total               1    
 9 D               57 1      1                   0.178
10 E               95 2      1                   0.171
# … with 62 more rows

Divide between groups of rows using group_by

We could first calculate the sum of each Group for each Symbol and then divide within each other based on value of 'MostUp' and 'LessUp'.

library(dplyr)

df %>%
  group_by(SYMBOL, variable, Sample, IDs, Group) %>%
  summarise(value = sum(value)) %>%
  summarise(value = value[Group == 'MostUp']/value[Group == 'LessUp'])

#  SYMBOL variable       Sample     IDs value
#  <fct>  <fct>          <fct>    <int> <dbl>
#1 TLR4   MMRF_2613_1_BM Baseline  2613 1.29 
#2 TLR8   MMRF_2613_1_BM Baseline  2613 0.585

To calculate t.test between groups we can do :

df1 <- df %>%
         group_by(SYMBOL, variable, Sample, IDs) %>%
         summarise(value = list(t.test(value[Group == 'MostUp'], 
                                       value[Group == 'LessUp']))) 

df1
# A tibble: 2 x 5
# Groups:   SYMBOL, variable, Sample [2]
#  SYMBOL variable       Sample     IDs value  
#  <fct>  <fct>          <fct>    <int> <list> 
#1  TLR4   MMRF_2613_1_BM Baseline  2613 <htest>
#2  TLR8   MMRF_2613_1_BM Baseline  2613 <htest>

data

df <- structure(list(SYMBOL = structure(c(2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L), .Label = c("TLR4", "TLR8"), class = "factor"), variable = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "MMRF_2613_1_BM", class = "factor"), 
value = c(3.186233, 5.471014, 2.917965, 2.147028, 7.497424, 
4.16523, 7.136523, 7.96523), Sample = structure(c(1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Baseline", class = "factor"), 
IDs = c(2613L, 2613L, 2613L, 2613L, 2613L, 2613L, 2613L, 
2613L), Group = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L
), .Label = c("LessUp", "MostUp"), class = "factor")), 
class = "data.frame", row.names = c(NA, -8L))

Iterative dividing by specific row for grouped dplyr data in R

Assuming that the level is not duplicated within 'group', after grouping by 'group', extract the 'value' that corresponds to the 'c' level (value[level == 'c']) and use that to divide the 'value' column

library(dplyr)
d %>% 
   group_by(group) %>%
   mutate(value = value/value[level == 'c'])
# A tibble: 12 x 3
# Groups:   group [4]
#   group  level value
#   <chr>  <chr> <dbl>
# 1 blue   a      1.67
# 2 blue   b      1.33
# 3 blue   c      1   
# 4 red    a      5   
# 5 red    b      4   
# 6 red    c      1   
# 7 yellow a      6   
# 8 yellow b      5   
# 9 yellow c      1   
#10 green  a      2.75
#11 green  b      2.25
#12 green  c      1

Or if there are multiple 'c', then use match to get the index of first occurrence of 'c'

d %>%
  group_by(group) %>%
  mutate(value = value/value[match('c', level)])

Or using base R

d$value <-  d$value/with(subset(d, level == 'c'), setNames(value, group)[d$group])

data

d <- structure(list(group = c("blue", "blue", "blue", "red", "red", 
"red", "yellow", "yellow", "yellow", "green", "green", "green"
), level = c("a", "b", "c", "a", "b", "c", "a", "b", "c", "a", 
"b", "c"), value = c(5, 4, 3, 10, 8, 2, 6, 5, 1, 11, 9, 4)), row.names = c(NA, 
-12L), class = c("tbl_df", "tbl", "data.frame"))

Dividing rows by rows using dplyr and tidyr

You can use the following :

df %>% 
  dplyr::group_by(Block) %>%
  dplyr::summarise(across(where(is.numeric), 
          list(CF = ~.[Treatment=="CF"] / .[Treatment=="Control"], 
               LR = ~.[Treatment=="LR"] / .[Treatment=="Control"]))) 

#  Block var1_CF var1_LR var2_CF var2_LR
#  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#1     1   1.22    0.556  Inf     Inf   
#2     2   0.75    0.917    0       6.25
#3     3   0.867   0.333    0       0   
#4     4   0.688   0.688    3.33    0

If you want the output to follow the same format as shown in your expected output you can use pivot_longer.

df %>% 
  dplyr::group_by(Block) %>%
  dplyr::summarise(across(where(is.numeric), 
           list(CF = ~.[Treatment=="CF"] / .[Treatment=="Control"], 
                LR = ~.[Treatment=="LR"] / .[Treatment=="Control"]))) %>%
  tidyr::pivot_longer(cols = -Block, 
               names_to = c('.value', 'Treatment'), 
               names_sep = '_')

#  Block Treatment  var1   var2
#  <dbl> <chr>     <dbl>  <dbl>
#1     1 CF        1.22  Inf   
#2     1 LR        0.556 Inf   
#3     2 CF        0.75    0   
#4     2 LR        0.917   6.25
#5     3 CF        0.867   0   
#6     3 LR        0.333   0   
#7     4 CF        0.688   3.33
#8     4 LR        0.688   0