Assign Value to Group Based on Condition in Column

Assign value to group based on condition in column

Here's a quick data.table one

library(data.table)
setDT(df)[, newValue := date[value == 4L], by = group]
df
# group date value newValue
# 1: 1 1 3 2
# 2: 1 2 4 2
# 3: 1 3 3 2
# 4: 2 4 4 4
# 5: 2 5 5 4
# 6: 2 6 6 4
# 7: 3 7 6 8
# 8: 3 8 4 8
# 9: 3 9 9 8

Here's a similar dplyr version

library(dplyr)
df %>%
group_by(group) %>%
mutate(newValue = date[value == 4L])

Or a possible base R solution using merge after filtering the data (will need some renaming afterwards)

merge(df, df[df$value == 4, c("group", "date")], by = "group")

Groupby specific column then assign new values base on conditions

I'm sure there's a nicer way, but here's one solution:

def validate(s):
return "Invalid" if len(set(s)) > 2 else "Valid"


mapping = df.groupby("Tag")["M_Name"].apply(validate)
df["Tag_2"] = df.loc[:, "Tag"].replace(mapping)

Output:

     Name M_Name  Tag    Tag_2
0 John 1 Valid
1 Dave a 1 Valid
2 Mary 1 Valid
3 Sam 1 Valid
4 Chris a 1 Valid
5 John 2 Invalid
6 Nola f 2 Invalid
7 Chuck 2 Invalid
8 Rob 2 Invalid
9 Chris a 2 Invalid
10 Angie 3 Valid
11 Joe 3 Valid

Assign value to other group members with conditions in a complex data structure

You may use match here -

library(dplyr)

df %>%
group_by(Group, Stage, Period) %>%
mutate(NewValue = ifelse(Stage == 2, Value[match(1, Role)], Value)) %>%
ungroup()

# Group Stage Period Role Value NewValue
# <int> <int> <int> <int> <dbl> <dbl>
# 1 1 1 1 NA 10.8 10.8
# 2 1 1 1 NA 15.5 15.5
# 3 1 1 1 NA 7.4 7.4
# 4 1 2 1 0 3.7 6.7
# 5 1 2 1 1 6.7 6.7
# 6 1 2 1 0 7.5 6.7
# 7 1 2 2 0 15.1 5.1
# 8 1 2 2 0 8.2 5.1
# 9 1 2 2 1 5.1 5.1
#10 2 1 1 NA 17.3 17.3
#11 2 1 1 NA 14.9 14.9
#12 2 1 1 NA 11.1 11.1
#13 2 2 1 0 13.3 3.6
#14 2 2 1 1 3.6 3.6
#15 2 2 1 0 7.2 3.6
#16 2 2 2 0 13.1 11
#17 2 2 2 0 16.5 11
#18 2 2 2 1 11 11

Make a new column based on group by conditionally in Python

Almost there. Change filter to transform and use a condition:

df['new_group'] = df.groupby("id")["group"] \
.transform(lambda x: 'two' if (x.nunique() == 2) else x)
print(df)

# Output:
id group new_group
0 x1 A two
1 x1 B two
2 x2 A A
3 x2 A A
4 x3 B B

In Pandas with Groupby: assign a value from a column conditioned on another column

another transforming with idxmin and loc:

df["col3"] = df.groupby("col0").col1.transform(lambda x: df.loc[x.idxmin(), "col2"])

to get

  col0  col1 col2 col3
0 a 0 w w
1 a 1 x w
2 b 2 y y
3 b 3 z y

R - Set values by group based on a condition in a dataframe

strsplit the overlaps column, subset to just those where data$value > 10, then use that distinct set of row_names to overwrite the original data with 0:

gr10 <- data$value > 10
sel <- Map(paste, data$group[gr10], strsplit(data$overlaps, ",\\s+")[gr10], sep="|")
sel <- Reduce(union, sel)
sel
#[1] "group_a|4" "group_a|5" "group_a|" "group_b|7"
data$value[do.call(paste, c(data[c("group","row_name")], sep="|")) %in% sel] <- 0
data
# group value row_name overlaps
#1 group_a 4 1 2
#2 group_a 5 2 3, 5
#3 group_a 48 3 4, 5
#4 group_a 0 4 5
#5 group_a 0 5
#6 group_b 12 6 7
#7 group_b 0 7

If the row_names are unique across the entire dataset, you can use simpler logic:

sel <- Reduce(union, strsplit(data$overlaps, ",\\s+")[data$value > 10])
sel
#[1] "4" "5" "7"
data$value[data$row_name %in% sel] <- 0

Bonus data.table solution:

library(data.table)
setDT(data)

data[
data[value > 10, .(row_name=unlist(strsplit(overlaps, ",\\s+"))), by=group],
on=.(group, row_name),
value := 0
]

R - select and assign value to group based on condition in column

We can use dplyr. After grouping by 'year', get the 'pos' where the 'days' are max (which.max(days)), as well do the sum of 'days' and 'sal'.

library(dplyr)
d %>%
group_by(year) %>%
summarise(pos = pos[which.max(days)], days = sum(days), sal = sum(sal))
# # A tibble: 2 × 4
# year pos days sal
# <int> <chr> <int> <int>
#1 2009 B 101 6600
#2 2010 D 100 8000


Related Topics



Leave a reply



Submit