Combine Rows and Sum Their Values

Combine rows and sum their values

If you turn your data.frame into a data.table you can make great use of the by argument

library(data.table)

DT <- data.table(DF) # DF is your original data

then it is simply one line:

DT[, lapply(.SD, sum), by=list(season, lic, id, vessel)]

We can filter just the 1998 Season, if we'd like: '

DT[, lapply(.SD, sum), by=list(season, lic, id, vessel)][season==1998]
season lic id vessel qtty grossTon
1: 1998 15593 411 2643 40 31.50
2: 1998 16350 431 435 68 114.00
3: 1998 16353 431 303 68 45.08

The entire result output looks like this:

    season    lic  id vessel qtty grossTon
1: 1998 15593 411 2643 40 31.50
2: 1999 27271 411 2643 40 31.50
3: 2000 35758 411 2643 40 31.50
4: 2001 45047 411 2643 50 31.50
5: 2002 56291 411 2643 55 31.50
6: 2003 66991 411 2643 55 31.50
7: 2004 80581 411 2643 55 31.50
8: 2005 95058 411 NA 52 NA
9: 2006 113379 411 10911 50 4.65
10: 2007 120894 411 10911 50 4.65
11: 2008 130033 411 2483 50 8.50
12: 2009 139201 411 2296 46 50.00
13: 2010 148833 411 2296 46 50.00
14: 2011 158395 411 2296 46 50.00
15: 1998 16350 431 435 68 114.00
16: 1998 16353 431 303 68 45.08
17: 1999 28641 431 303 68 45.08
18: 1999 28644 431 435 68 114.00
19: 2000 37491 436 2021 50 19.11
20: 2001 47019 436 2021 50 19.11
21: 2002 57588 436 2021 51 19.11
22: 2003 69128 436 2021 51 19.11
23: 2004 82400 436 2021 52 19.11
24: 2005 95599 436 2021 50 19.11
25: 2006 113126 436 2021 50 19.11
26: 2007 122387 436 2021 50 19.11
27: 2008 131126 436 2021 50 19.11
28: 2009 140417 436 2021 50 19.11
29: 2010 150673 436 2021 50 19.11
30: 2011 159776 436 2021 50 19.11
season lic id vessel qtty grossTon

Merge rows with same names and sum other values in other column rows

We could first group_by country

and then use summarise with across

library(dplyr)
df %>%
group_by(country) %>%
summarise(across(everything(), sum))

Output:

 country new_persons_vac~ total_persons_v~ new_persons_ful~ total_persons_f~ new_vaccine_dos~ total_vaccine_d~
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Afghan~ 294056 8452317 163535 2313338 457591 10765655
2 Albania 601152 27639676 465433 18105836 459226 45745512
3 Andorra 40569 360995 25838 144358 58535 506402
4 Angola 371996 9545624 559633 4688357 931629 14233981
5 Anguil~ 3206 73046 6847 48524 10053 121570
6 Antigu~ 5232 770379 26084 485839 31316 1256218
7 Argent~ 65820302 3858592405 16136889 917220373 81957191 4775812778
8 Armenia 138306 426851 58214 135848 196520 562699
9 Aruba 55435 4907836 52549 3439184 107984 8347020
10 Austra~ 14227655 811027845 5722445 163311327 19238830 974339172
# ... with 183 more rows

head of data:

df <- structure(list(country = c("Brazil", "Brazil", "Brazil", "Brazil", 
"Brazil", "Brazil"), new_persons_vaccinated = c(1, 1, 1, 1, 1,
1), total_persons_vaccinated = c(1, 1, 1, 2, 1, 1), new_persons_fully_vaccinated = c(0,
0, 0, 0, 0, 0), total_persons_fully_vaccinated = c(0, 0, 0, 0,
0, 0), new_vaccine_doses_administered = c(1, 1, 1, 1, 1, 1),
total_vaccine_doses_administered = c(1, 1, 1, 2, 1, 1)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))

sum up multiple rows by condition in R

In base you can use rowsum to sum up rows by group.

rowsum(df[-1], df[,1])
# gene1 gene2
#sample1 399 34
#sample2 80 0
#sample3 0 456

Or using aggregate:

aggregate(.~file, df, sum)
# file gene1 gene2
#1 sample1 399 34
#2 sample2 80 0
#3 sample3 0 456

Or using by:

do.call(rbind, by(df[-1], df[,1], colSums))
# gene1 gene2
#sample1 399 34
#sample2 80 0
#sample3 0 456

Merge similar rows and sum their values?

Use GROUP BY and SUM:

SELECT URL, SUM(Art) as Art, SUM(Design) AS Design
FROM yourtable
GROUP BY URL

Sum duplicate rows that are grouped and combine their IDs in R

We could use group_by with summarise to paste (str_c) the 'dive_phase' and sum the 'beats20_max'

library(dplyr)
library(stringr)
df1 %>%
group_by(seal_ID, diveNum, datetime) %>%
summarise(dive_phase = str_c(dive_phase, collapse = ""),
beats20_max = sum(beats20_max, na.rm = TRUE), .groups = 'drop') %>%
select(any_of(names(df1)))

-output

# A tibble: 12 × 5
seal_ID diveNum dive_phase datetime beats20_max
<chr> <int> <chr> <chr> <int>
1 Baikal 19 D 2019-04-02 14:43:00 12
2 Baikal 19 D 2019-04-02 14:43:20 14
3 Baikal 19 D 2019-04-02 14:43:40 15
4 Baikal 19 D 2019-04-02 14:44:00 15
5 Baikal 19 D 2019-04-02 14:44:20 14
6 Baikal 19 D 2019-04-02 14:44:40 13
7 Baikal 19 D 2019-04-02 14:45:00 15
8 Baikal 19 D 2019-04-02 14:45:20 15
9 Baikal 19 D 2019-04-02 14:45:40 15
10 Baikal 19 BD 2019-04-02 14:46:00 16
11 Baikal 19 B 2019-04-02 14:46:20 15
12 Baikal 19 B 2019-04-02 14:46:40 15

data

df1 <- structure(list(seal_ID = c("Baikal", "Baikal", "Baikal", "Baikal", 
"Baikal", "Baikal", "Baikal", "Baikal", "Baikal", "Baikal", "Baikal",
"Baikal", "Baikal"), diveNum = c(19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L), dive_phase = c("D", "D",
"D", "D", "D", "D", "D", "D", "D", "B", "D", "B", "B"),
atetime = c("2019-04-02 14:43:00",
"2019-04-02 14:43:20", "2019-04-02 14:43:40", "2019-04-02 14:44:00",
"2019-04-02 14:44:20", "2019-04-02 14:44:40", "2019-04-02 14:45:00",
"2019-04-02 14:45:20", "2019-04-02 14:45:40", "2019-04-02 14:46:00",
"2019-04-02 14:46:00", "2019-04-02 14:46:20", "2019-04-02 14:46:40"
), HR_mean = c(38.6, 42.2, 44, 45.5, 42.1, 39.9, 45.5, 44.6,
45.9, 46.1, 55.8, 47.4, 45.4), HR_max = c(44.8, 48, 54.1, 61.9,
49.2, 44.1, 54.5, 53.1, 51.7, 51.7, 59.4, 57.1, 53.6), beats20_mean = c(6.5,
7.5, 8, 8, 7.5, 7, 8, 8, 8, 7.5, 1.5, 8, 8), beats20_max = c(12L,
14L, 15L, 15L, 14L, 13L, 15L, 15L, 15L, 14L, 2L, 15L, 15L)),
class = "data.frame", row.names = c("8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20"))

Combine Rows & Sum Values in a Worksheet

  1. Sort them on all alphabetic columns you deem important.
  2. In an unused column to the right use a formula like the following in the second row,

    =IF($A2&$B2&$C2&$D2=$A3&$B3&$C3&$D3, "", SUMIFS(E:E,$A:$A, $A2,$B:$B, $B2,$C:$C, $C2,$D:$D, $D2))

  3. Copy that formula right one column then fill both columns down as far as your data goes

  4. Filter on the two columns, removing blanks.

            radiations measurements from a PRM-9000

  5. Optionally copy the data to a new report worksheet and remove columns E & F.

Addendum:

A more automated approach could be achieved with some form of array and some simple mathematical operations. I've chosen a dictionary object in order to take use of its indexed Key to recognize patterns in the first four alphabetic identifiers.

To use a scripting dictionary, you need to go into the VBE's Tools ► References and add Microsoft Scripting Runtime. The following code will not compile without it.

The following has been adjusted for dynamic columns of keys and integers.

Sub rad_collection()
Dim rw As Long, nc As Long, sTMP As String, v As Long, vTMP As Variant
Dim i As Long, iNumKeys As Long, iNumInts As Long
Dim dRADs As New Scripting.Dictionary

dRADs.CompareMode = vbTextCompare
iNumKeys = 5 'possibly calculated by num text (see below)
iNumInts = 2 'possibly calculated by num ints (see below)

With ThisWorkbook.Sheets("Sheet4").Cells(1, 1).CurrentRegion
'iNumKeys = Application.CountA(.Rows(2)) - Application.Count(.Rows(2)) 'alternate count of txts
'iNumInts = Application.Count(.Rows(2)) 'alternate count of ints
For rw = 2 To .Cells(Rows.Count, 1).End(xlUp).row
vTMP = .Cells(rw, 1).Resize(1, iNumKeys).Value2
sTMP = Join(Application.Index(vTMP, 1, 0), Chr(183))
If Not dRADs.Exists(sTMP) Then
dRADs.Add Key:=sTMP, Item:=Join(Application.Index(.Cells(rw, iNumKeys + 1).Resize(1, iNumInts).Value2, 1, 0), Chr(183))
Else
vTMP = Split(dRADs.Item(sTMP), Chr(183))
For v = LBound(vTMP) To UBound(vTMP)
vTMP(v) = vTMP(v) + .Cells(rw, iNumKeys + 1 + v).Value2
Next v
dRADs.Item(sTMP) = Join(vTMP, Chr(183))
End If

Next rw

rw = 1
nc = iNumKeys + iNumInts + 1
.Cells(rw, nc + 1).CurrentRegion.ClearContents 'clear previous
.Cells(rw, nc + 1).Resize(1, nc - 1) = .Cells(rw, 1).Resize(1, nc - 1).Value2
For Each vTMP In dRADs.Keys
'Debug.Print vTMP & "|" & dRADs.Item(vTMP)
rw = rw + 1
.Cells(rw, nc + 1).Resize(1, iNumKeys) = Split(vTMP, Chr(183))
.Cells(rw, nc + iNumKeys + 1).Resize(1, iNumInts) = Split(dRADs.Item(vTMP), Chr(183))
.Cells(rw, nc + iNumKeys + 1).Resize(1, iNumInts) = _
.Cells(rw, nc + iNumKeys + 1).Resize(1, iNumInts).Value2
Next vTMP
End With

dRADs.RemoveAll: Set dRADs = Nothing

End Sub

Just run the macro against the numbers you have provided as samples. I've assumed some form of column header labels in the first row. The dictionary object is populated and duplicates in the combined identifiers have their numbers summed. All that is left is to split them back up and return them to the worksheet in an unused area.

    Rad measurement collection

Location of Microsoft Scripting Runtime - In the Visual Basic Editor (aka VBE) choose Tools ► References (Alt+T,R) and scroll down a little more than halfway to find it.

        Microsoft Scripting Runtime

Modifying a sub that combines and sums duplicate rows

Slight modification using a dictionary:

Sub Consolidate()

Application.ScreenUpdating = False

Dim s As Worksheet, last_row As Long
Dim row As Long, dict As Object, k As String, m As Long

Set dict = CreateObject("scripting.dictionary") 'for tracking A+B vs first row occurence
Set s = Worksheets("Sheet12")
s.Activate
last_row = s.Cells(s.Rows.Count, 1).End(xlUp).row 'find the last row with data

'map all the A+B combinations to the first row they occur on
For row = 3 To last_row
k = s.Cells(row, "A").Value & "~~" & s.Cells(row, "B").Value
If Not dict.exists(k) Then dict.Add k, row
Next row

For row = last_row To 3 Step -1
k = s.Cells(row, "A").Value & "~~" & s.Cells(row, "B").Value
m = dict(k) 'find first match to this row from the dictionary
If m < row Then 'earlier row?
'combine rows `row` and `m`
s.Cells(m, "C").Value = s.Cells(m, "C").Value + s.Cells(row, "C").Value
s.Cells(m, "D").Value = s.Cells(m, "D").Value + s.Cells(row, "D").Value

s.Rows(row).Delete
End If 'matched a different row
Next row
End Sub


Related Topics



Leave a reply



Submit