Collapse All Columns by an Id Column

Collapse all columns by an ID column

Here's an option using summarise_each (which makes it easy to apply the changes to all columns except the grouping variables) and toString:

require(dplyr)

have %>%
group_by(ID) %>%
summarise_each(funs(toString))

#Source: local data frame [5 x 3]
#
# ID info1 info2
#1 id101 one first
#2 id102 twoA, twoB second alias A, second alias B
#3 id103 threeA, threeB third alias A, third alias B
#4 id104 four fourth
#5 id105 five fifth

Or, if you want it separated by semicolons, you can use:

have %>%
group_by(ID) %>%
summarise_each(funs(paste(., collapse = "; ")))

Pandas: collapse rows with same column ID and concatenate multiple columns into 1

First join columns fruits with qty, then aggregate join and last add ID converted to strings:

df['description'] = df['fruits'] + '(' + df['qty'].astype(str) + ')'

df = df.groupby('ID')['description'].agg(' '.join).reset_index()
df['description'] = df['ID'].astype(str) + ': ' + df['description']
print (df)
ID description
0 101 101: oranges(1)
1 134 134: apples(2) pears(3)
2 576 576: pears(3) oranges(2)
3 837 837: apples(1)

Another idea with GroupBy.apply:

def f(x):
d = x['fruits'] + '(' + x['qty'].astype(str) + ')'
return x.name.astype(str) + ': ' + ' '.join(d)

df = df.groupby('ID').apply(f).reset_index(name='description')

print (df)
ID description
0 101 101: oranges(1)
1 134 134: apples(2) pears(3)
2 576 576: pears(3) oranges(2)
3 837 837: apples(1)

Collapse multiple columns into two columns using column name as ID

You can use melt from reshape2 package

library(reshape2)
melt(df, variable.name = "DV", value.name = "IV")

DV IV
1 A 1
2 A 5
3 B 2
4 B 6
5 C 3
6 C 7
7 D 4
8 D 8

Collapse rows with same identifier and columns and retain all values in r

You can use tidyr::fill to fill the NA, and only keep the non-duplicated rows using distinct.

library(dplyr)
library(tidyr)

df %>%
group_by(store, manager) %>%
fill(fruit, vegetable, .direction = "updown") %>%
distinct()

# A tibble: 5 × 5
# Groups: store, manager [3]
id store manager fruit vegetable
<int> <chr> <chr> <chr> <chr>
1 1 Grocery1 Joe apple zucchini
2 1 Grocery1 Joe lemon zucchini
3 2 Grocery2 Amy orange asparagus
4 2 Grocery2 Amy orange spinach
5 3 Grocery3 Bill NA NA

Pyspark - collapse all columns in dataframe by group variable

You can use dtypes to classify, group by string and date type columns, and aggregate numeric columns respectively.

df = df.groupBy(*[t[0] for t in df.dtypes if t[1] in ('string', 'date')]) \
.agg(*[F.sum(t[0]).alias(t[0]) for t in df.dtypes if t[1] not in ('string', 'date')])
df.printSchema()
df.show(truncate=False)

Collapse columns by ID and paste value only if they differ

Simply adding unique() to your paste function works -

x <- group_by(df, Uploaded_variation) %>%
summarise_all(funs(paste(unique(.), collapse = "; ")))

# showing just one column
x$Location
[1] "1:113905767-113905767" "1:50380360-50380381" "1:87691240-87691240"
[4] "1:18480845-18480845" "1:10506158-10506158"

r aggregate or collapse specific column values by id

We can group by 'ID', 'Brand' and paste the other columns

library(dplyr)
df1 %>%
group_by(ID, Brand) %>%
summarise(across(everything(), toString))

For each ID, separate groups into columns and collapse multiple value strings in R

We can collapse val by ID and DB and then use pivot_wider.

library(dplyr)

in.dat %>%
group_by(ID, DB) %>%
summarise(val = paste0(val, collapse = ";")) %>%
tidyr::pivot_wider(names_from = DB, values_from = val)

# ID bio func loc
# <fct> <chr> <chr> <chr>
#1 A1 IPR1;IPR2 s43 333-456
#2 B1 IPR7;IPR8 q87 566-900

Collapse rows with complementary column data in a data.table in r

Maybe this will help :

library(dplyr)

df.in %>%
group_by(tkr) %>%
summarise(across(lboq:ap, ~.x[.x != 0][1]))

# tkr lboq locq ap
#* <chr> <dbl> <dbl> <dbl>
#1 abc 296 -296 134
#2 def -390 390 23
#3 ghi -88 88 17

For each tkr this selects the 1st non-zero value in columns lboq:ap.

r collapsing data from multiple columns into one

df$variable_7 <- apply(df, 1, function(x) paste(x[!is.na(x) & x != "No"], collapse = ", "));
df;
# variable_1 variable_2 variable_3 variable_4 variable_5 variable_6
#1 Var1 Var2 <NA> <NA> <NA> <NA>
#2 <NA> No <NA> Var4 No <NA>
#3 <NA> <NA> Var3 <NA> Var5 Var6
#4 Var1 <NA> <NA> <NA> <NA> <NA>
# variable_7
#1 Var1, Var2
#2 Var4
#3 Var3, Var5, Var6
#4 Var1

Explanation: Use apply and paste(..., collapse = ", ") to concatenate all row entries (except NAs and "No"s) and store in new column variable_7.


Sample data

df <- data.frame(
cbind(
variable_1 = c('Var1', NA, NA,'Var1'),
variable_2 = c('Var2', 'No', NA, NA),
variable_3 = c(NA, NA, 'Var3', NA),
variable_4 = c(NA, 'Var4', NA, NA),
variable_5 = c(NA, 'No', 'Var5', NA),
variable_6 = c(NA, NA, 'Var6', NA)

))


Related Topics



Leave a reply



Submit