Assign Value to Group Based on Condition in Column

Assign value to group based on condition in column

Here's a quick data.table one

library(data.table)
setDT(df)[, newValue := date[value == 4L], by = group]
df
#    group date value newValue
# 1:     1    1     3        2
# 2:     1    2     4        2
# 3:     1    3     3        2
# 4:     2    4     4        4
# 5:     2    5     5        4
# 6:     2    6     6        4
# 7:     3    7     6        8
# 8:     3    8     4        8
# 9:     3    9     9        8

Here's a similar dplyr version

library(dplyr)
df %>%
  group_by(group) %>%
  mutate(newValue = date[value == 4L])

Or a possible base R solution using merge after filtering the data (will need some renaming afterwards)

merge(df, df[df$value == 4, c("group", "date")], by = "group")

Groupby specific column then assign new values base on conditions

I'm sure there's a nicer way, but here's one solution:

def validate(s):
    return "Invalid" if len(set(s)) > 2 else "Valid"


mapping = df.groupby("Tag")["M_Name"].apply(validate)
df["Tag_2"] = df.loc[:, "Tag"].replace(mapping)

Output:

     Name M_Name  Tag    Tag_2
0    John           1    Valid
1    Dave      a    1    Valid
2    Mary           1    Valid
3     Sam           1    Valid
4   Chris      a    1    Valid
5    John           2  Invalid
6    Nola      f    2  Invalid
7   Chuck           2  Invalid
8     Rob           2  Invalid
9   Chris      a    2  Invalid
10  Angie           3    Valid
11    Joe           3    Valid

Assign value to other group members with conditions in a complex data structure

You may use match here -

library(dplyr)

df %>%
  group_by(Group, Stage, Period) %>%
  mutate(NewValue = ifelse(Stage == 2, Value[match(1, Role)], Value)) %>%
  ungroup()

#   Group Stage Period  Role Value NewValue
#   <int> <int>  <int> <int> <dbl>    <dbl>
# 1     1     1      1    NA  10.8     10.8
# 2     1     1      1    NA  15.5     15.5
# 3     1     1      1    NA   7.4      7.4
# 4     1     2      1     0   3.7      6.7
# 5     1     2      1     1   6.7      6.7
# 6     1     2      1     0   7.5      6.7
# 7     1     2      2     0  15.1      5.1
# 8     1     2      2     0   8.2      5.1
# 9     1     2      2     1   5.1      5.1
#10     2     1      1    NA  17.3     17.3
#11     2     1      1    NA  14.9     14.9
#12     2     1      1    NA  11.1     11.1
#13     2     2      1     0  13.3      3.6
#14     2     2      1     1   3.6      3.6
#15     2     2      1     0   7.2      3.6
#16     2     2      2     0  13.1     11  
#17     2     2      2     0  16.5     11  
#18     2     2      2     1  11       11

Make a new column based on group by conditionally in Python

Almost there. Change filter to transform and use a condition:

df['new_group'] = df.groupby("id")["group"] \
                    .transform(lambda x: 'two' if (x.nunique() == 2) else x)
print(df)

# Output:
   id group new_group
0  x1     A       two
1  x1     B       two
2  x2     A         A
3  x2     A         A
4  x3     B         B

In Pandas with Groupby: assign a value from a column conditioned on another column

another transforming with idxmin and loc:

df["col3"] = df.groupby("col0").col1.transform(lambda x: df.loc[x.idxmin(), "col2"])

to get

  col0  col1 col2 col3
0    a     0    w    w
1    a     1    x    w
2    b     2    y    y
3    b     3    z    y

R - Set values by group based on a condition in a dataframe

strsplit the overlaps column, subset to just those where data$value > 10, then use that distinct set of row_names to overwrite the original data with 0:

gr10 <- data$value > 10
sel <- Map(paste, data$group[gr10], strsplit(data$overlaps, ",\\s+")[gr10], sep="|")
sel <- Reduce(union, sel)
sel
#[1] "group_a|4" "group_a|5" "group_a|"  "group_b|7"
data$value[do.call(paste, c(data[c("group","row_name")], sep="|")) %in% sel] <- 0
data
#    group value row_name overlaps
#1 group_a     4        1        2
#2 group_a     5        2     3, 5
#3 group_a    48        3     4, 5
#4 group_a     0        4        5
#5 group_a     0        5         
#6 group_b    12        6        7
#7 group_b     0        7

If the row_names are unique across the entire dataset, you can use simpler logic:

sel <- Reduce(union, strsplit(data$overlaps, ",\\s+")[data$value > 10])
sel
#[1] "4" "5" "7"
data$value[data$row_name %in% sel] <- 0

Bonus data.table solution:

library(data.table)
setDT(data)

data[
  data[value > 10, .(row_name=unlist(strsplit(overlaps, ",\\s+"))), by=group],
  on=.(group, row_name),
  value := 0
]

R - select and assign value to group based on condition in column

We can use dplyr. After grouping by 'year', get the 'pos' where the 'days' are max (which.max(days)), as well do the sum of 'days' and 'sal'.

library(dplyr)
d %>% 
  group_by(year) %>% 
  summarise(pos = pos[which.max(days)], days = sum(days), sal = sum(sal))
# # A tibble: 2 × 4
#   year   pos  days   sal
#  <int> <chr> <int> <int>
#1  2009     B   101  6600
#2  2010     D   100  8000

Assign Value to Group Based on Condition in Column