Combine Rows and Sum Their Values

Combine rows and sum their values

If you turn your data.frame into a data.table you can make great use of the by argument

library(data.table)

DT <- data.table(DF)  # DF is your original data

then it is simply one line:

DT[, lapply(.SD, sum), by=list(season, lic, id, vessel)]

We can filter just the 1998 Season, if we'd like: '

DT[, lapply(.SD, sum), by=list(season, lic, id, vessel)][season==1998]
   season   lic  id vessel qtty grossTon
1:   1998 15593 411   2643   40    31.50
2:   1998 16350 431    435   68   114.00
3:   1998 16353 431    303   68    45.08

The entire result output looks like this:

    season    lic  id vessel qtty grossTon
 1:   1998  15593 411   2643   40    31.50
 2:   1999  27271 411   2643   40    31.50
 3:   2000  35758 411   2643   40    31.50
 4:   2001  45047 411   2643   50    31.50
 5:   2002  56291 411   2643   55    31.50
 6:   2003  66991 411   2643   55    31.50
 7:   2004  80581 411   2643   55    31.50
 8:   2005  95058 411     NA   52       NA
 9:   2006 113379 411  10911   50     4.65
10:   2007 120894 411  10911   50     4.65
11:   2008 130033 411   2483   50     8.50
12:   2009 139201 411   2296   46    50.00
13:   2010 148833 411   2296   46    50.00
14:   2011 158395 411   2296   46    50.00
15:   1998  16350 431    435   68   114.00
16:   1998  16353 431    303   68    45.08
17:   1999  28641 431    303   68    45.08
18:   1999  28644 431    435   68   114.00
19:   2000  37491 436   2021   50    19.11
20:   2001  47019 436   2021   50    19.11
21:   2002  57588 436   2021   51    19.11
22:   2003  69128 436   2021   51    19.11
23:   2004  82400 436   2021   52    19.11
24:   2005  95599 436   2021   50    19.11
25:   2006 113126 436   2021   50    19.11
26:   2007 122387 436   2021   50    19.11
27:   2008 131126 436   2021   50    19.11
28:   2009 140417 436   2021   50    19.11
29:   2010 150673 436   2021   50    19.11
30:   2011 159776 436   2021   50    19.11
    season    lic  id vessel qtty grossTon

Merge rows with same names and sum other values in other column rows

We could first group_by country

and then use summarise with across

library(dplyr)
df %>% 
  group_by(country) %>% 
  summarise(across(everything(), sum))

Output:

 country new_persons_vac~ total_persons_v~ new_persons_ful~ total_persons_f~ new_vaccine_dos~ total_vaccine_d~
   <chr>              <dbl>            <dbl>            <dbl>            <dbl>            <dbl>            <dbl>
 1 Afghan~           294056          8452317           163535          2313338           457591         10765655
 2 Albania           601152         27639676           465433         18105836           459226         45745512
 3 Andorra            40569           360995            25838           144358            58535           506402
 4 Angola            371996          9545624           559633          4688357           931629         14233981
 5 Anguil~             3206            73046             6847            48524            10053           121570
 6 Antigu~             5232           770379            26084           485839            31316          1256218
 7 Argent~         65820302       3858592405         16136889        917220373         81957191       4775812778
 8 Armenia           138306           426851            58214           135848           196520           562699
 9 Aruba              55435          4907836            52549          3439184           107984          8347020
10 Austra~         14227655        811027845          5722445        163311327         19238830        974339172
# ... with 183 more rows

head of data:

df <- structure(list(country = c("Brazil", "Brazil", "Brazil", "Brazil", 
"Brazil", "Brazil"), new_persons_vaccinated = c(1, 1, 1, 1, 1, 
1), total_persons_vaccinated = c(1, 1, 1, 2, 1, 1), new_persons_fully_vaccinated = c(0, 
0, 0, 0, 0, 0), total_persons_fully_vaccinated = c(0, 0, 0, 0, 
0, 0), new_vaccine_doses_administered = c(1, 1, 1, 1, 1, 1), 
    total_vaccine_doses_administered = c(1, 1, 1, 2, 1, 1)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

sum up multiple rows by condition in R

In base you can use rowsum to sum up rows by group.

rowsum(df[-1], df[,1])
#        gene1 gene2
#sample1   399    34
#sample2    80     0
#sample3     0   456

Or using aggregate:

aggregate(.~file, df, sum)
#     file gene1 gene2
#1 sample1   399    34
#2 sample2    80     0
#3 sample3     0   456

Or using by:

do.call(rbind, by(df[-1], df[,1], colSums))
#        gene1 gene2
#sample1   399    34
#sample2    80     0
#sample3     0   456

Merge similar rows and sum their values?

Use GROUP BY and SUM:

SELECT URL, SUM(Art) as Art, SUM(Design) AS Design
FROM yourtable
GROUP BY URL

Sum duplicate rows that are grouped and combine their IDs in R

We could use group_by with summarise to paste (str_c) the 'dive_phase' and sum the 'beats20_max'

library(dplyr)
library(stringr)
df1 %>%
    group_by(seal_ID, diveNum, datetime) %>%
    summarise(dive_phase = str_c(dive_phase, collapse = ""), 
         beats20_max = sum(beats20_max, na.rm = TRUE), .groups = 'drop') %>%
    select(any_of(names(df1)))

-output

# A tibble: 12 × 5
   seal_ID diveNum dive_phase datetime            beats20_max
   <chr>     <int> <chr>      <chr>                     <int>
 1 Baikal       19 D          2019-04-02 14:43:00          12
 2 Baikal       19 D          2019-04-02 14:43:20          14
 3 Baikal       19 D          2019-04-02 14:43:40          15
 4 Baikal       19 D          2019-04-02 14:44:00          15
 5 Baikal       19 D          2019-04-02 14:44:20          14
 6 Baikal       19 D          2019-04-02 14:44:40          13
 7 Baikal       19 D          2019-04-02 14:45:00          15
 8 Baikal       19 D          2019-04-02 14:45:20          15
 9 Baikal       19 D          2019-04-02 14:45:40          15
10 Baikal       19 BD         2019-04-02 14:46:00          16
11 Baikal       19 B          2019-04-02 14:46:20          15
12 Baikal       19 B          2019-04-02 14:46:40          15

data

df1 <- structure(list(seal_ID = c("Baikal", "Baikal", "Baikal", "Baikal", 
"Baikal", "Baikal", "Baikal", "Baikal", "Baikal", "Baikal", "Baikal", 
"Baikal", "Baikal"), diveNum = c(19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L), dive_phase = c("D", "D", 
"D", "D", "D", "D", "D", "D", "D", "B", "D", "B", "B"), 
atetime = c("2019-04-02 14:43:00", 
"2019-04-02 14:43:20", "2019-04-02 14:43:40", "2019-04-02 14:44:00", 
"2019-04-02 14:44:20", "2019-04-02 14:44:40", "2019-04-02 14:45:00", 
"2019-04-02 14:45:20", "2019-04-02 14:45:40", "2019-04-02 14:46:00", 
"2019-04-02 14:46:00", "2019-04-02 14:46:20", "2019-04-02 14:46:40"
), HR_mean = c(38.6, 42.2, 44, 45.5, 42.1, 39.9, 45.5, 44.6, 
45.9, 46.1, 55.8, 47.4, 45.4), HR_max = c(44.8, 48, 54.1, 61.9, 
49.2, 44.1, 54.5, 53.1, 51.7, 51.7, 59.4, 57.1, 53.6), beats20_mean = c(6.5, 
7.5, 8, 8, 7.5, 7, 8, 8, 8, 7.5, 1.5, 8, 8), beats20_max = c(12L, 
14L, 15L, 15L, 14L, 13L, 15L, 15L, 15L, 14L, 2L, 15L, 15L)),
 class = "data.frame", row.names = c("8", 
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
"20"))

Combine Rows & Sum Values in a Worksheet

Sort them on all alphabetic columns you deem important.
In an unused column to the right use a formula like the following in the second row,
=IF($A2&$B2&$C2&$D2=$A3&$B3&$C3&$D3, "", SUMIFS(E:E,$A:$A, $A2,$B:$B, $B2,$C:$C, $C2,$D:$D, $D2))
Copy that formula right one column then fill both columns down as far as your data goes
Filter on the two columns, removing blanks.
Optionally copy the data to a new report worksheet and remove columns E & F.

Addendum:

A more automated approach could be achieved with some form of array and some simple mathematical operations. I've chosen a dictionary object in order to take use of its indexed Key to recognize patterns in the first four alphabetic identifiers.

To use a scripting dictionary, you need to go into the VBE's Tools ► References and add Microsoft Scripting Runtime. The following code will not compile without it.

The following has been adjusted for dynamic columns of keys and integers.

Sub rad_collection()
    Dim rw As Long, nc As Long, sTMP As String, v As Long, vTMP As Variant
    Dim i As Long, iNumKeys As Long, iNumInts As Long
    Dim dRADs As New Scripting.Dictionary

    dRADs.CompareMode = vbTextCompare
    iNumKeys = 5    'possibly calculated by num text (see below)
    iNumInts = 2    'possibly calculated by num ints (see below)

    With ThisWorkbook.Sheets("Sheet4").Cells(1, 1).CurrentRegion
        'iNumKeys = Application.CountA(.Rows(2)) - Application.Count(.Rows(2))  'alternate count of txts
        'iNumInts = Application.Count(.Rows(2))    'alternate count of ints
        For rw = 2 To .Cells(Rows.Count, 1).End(xlUp).row
                vTMP = .Cells(rw, 1).Resize(1, iNumKeys).Value2
                sTMP = Join(Application.Index(vTMP, 1, 0), Chr(183))
                If Not dRADs.Exists(sTMP) Then
                    dRADs.Add Key:=sTMP, Item:=Join(Application.Index(.Cells(rw, iNumKeys + 1).Resize(1, iNumInts).Value2, 1, 0), Chr(183))
                Else
                    vTMP = Split(dRADs.Item(sTMP), Chr(183))
                    For v = LBound(vTMP) To UBound(vTMP)
                        vTMP(v) = vTMP(v) + .Cells(rw, iNumKeys + 1 + v).Value2
                    Next v
                    dRADs.Item(sTMP) = Join(vTMP, Chr(183))
                End If

        Next rw

        rw = 1
        nc = iNumKeys + iNumInts + 1
        .Cells(rw, nc + 1).CurrentRegion.ClearContents  'clear previous
        .Cells(rw, nc + 1).Resize(1, nc - 1) = .Cells(rw, 1).Resize(1, nc - 1).Value2
        For Each vTMP In dRADs.Keys
            'Debug.Print vTMP & "|" & dRADs.Item(vTMP)
            rw = rw + 1
            .Cells(rw, nc + 1).Resize(1, iNumKeys) = Split(vTMP, Chr(183))
            .Cells(rw, nc + iNumKeys + 1).Resize(1, iNumInts) = Split(dRADs.Item(vTMP), Chr(183))
            .Cells(rw, nc + iNumKeys + 1).Resize(1, iNumInts) = _
              .Cells(rw, nc + iNumKeys + 1).Resize(1, iNumInts).Value2
        Next vTMP
    End With

    dRADs.RemoveAll: Set dRADs = Nothing

End Sub

Just run the macro against the numbers you have provided as samples. I've assumed some form of column header labels in the first row. The dictionary object is populated and duplicates in the combined identifiers have their numbers summed. All that is left is to split them back up and return them to the worksheet in an unused area.

Rad measurement collection

Location of Microsoft Scripting Runtime - In the Visual Basic Editor (aka VBE) choose Tools ► References (Alt+T,R) and scroll down a little more than halfway to find it.

Microsoft Scripting Runtime

Modifying a sub that combines and sums duplicate rows

Slight modification using a dictionary:

Sub Consolidate()
     
    Application.ScreenUpdating = False
        
    Dim s As Worksheet, last_row As Long
    Dim row As Long, dict As Object, k As String, m As Long
    
    Set dict = CreateObject("scripting.dictionary") 'for tracking A+B vs first row occurence
    Set s = Worksheets("Sheet12")
    s.Activate
    last_row = s.Cells(s.Rows.Count, 1).End(xlUp).row 'find the last row with data
        
    'map all the A+B combinations to the first row they occur on
    For row = 3 To last_row
        k = s.Cells(row, "A").Value & "~~" & s.Cells(row, "B").Value
        If Not dict.exists(k) Then dict.Add k, row
    Next row
        
    For row = last_row To 3 Step -1
        k = s.Cells(row, "A").Value & "~~" & s.Cells(row, "B").Value
        m = dict(k)                                 'find first match to this row from the dictionary
        If m < row Then                             'earlier row?
            'combine rows `row` and `m`
            s.Cells(m, "C").Value = s.Cells(m, "C").Value + s.Cells(row, "C").Value
            s.Cells(m, "D").Value = s.Cells(m, "D").Value + s.Cells(row, "D").Value
            
            s.Rows(row).Delete
        End If 'matched a different row
    Next row
End Sub

Combine Rows and Sum Their Values