Correctly Specifying "Logical Conditions" (In R)

Correctly Specifying Logical Conditions (in R)

UPDATE:

I think I was able to resolve this problem - now the "logical conditions" are respected in the final output:

#load libraries
library(dplyr)
library(mco)

#define function

funct_set <- function (x) {
    x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]; x5 <- x[5]; x6 <- x[6]; x[7] <- x[7]
    f <- numeric(4)
    
    
    #bin data according to random criteria
    train_data <- train_data %>%
        mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
                            ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
    
    train_data$cat = as.factor(train_data$cat)
    
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(a1, b1, c1, cat)
    
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(a1, b1, c1, cat)
    
    c_table = train_data %>%
        filter(cat == "c") %>%
        select(a1, b1, c1, cat)
    
    
    
    #calculate  quantile ("quant") for each bin
    
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > x[5],1,0 )))
    
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > x[6],1,0 )))
    
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > x[7],1,0 )))
    
    f[1] = mean(table_a$quant)
    f[2] = mean(table_b$quant)
    f[3] = mean(table_c$quant)
    
    
    #group all tables
    
    final_table = rbind(table_a, table_b, table_c)
    # calculate the total mean : this is what needs to be optimized
    
    f[4] = mean(final_table$quant)
    
    
    return (f);
}

gn <- function(x) {
    g1 <- x[3] - x[1] 
    g2<- x[4] - x[2] 
    g3 <- x[7] - x[6]
    g4 <- x[6] - x[5] 
    return(c(g1,g2,g3,g4))
}

optimization <- nsga2(funct_set, idim = 7, odim = 4 , constraints = gn, cdim = 4,
                      
                      generations=150,
                      popsize=100,
                      cprob=0.7,
                      cdist=20,
                      mprob=0.2,
                      mdist=20,
                      lower.bounds=rep(80,80,80,80, 100,200,300),
                      upper.bounds=rep(120,120,120,120,200,300,400)
)

Now, if we take a look at the output:

#view output
optimization

Sample Image

All the logical conditions (i.e. the "constraints") are now respected!

Note: if possible, I would still be interested in seeing alternate ways to solve this problem

Thanks everyone!

How to write more than one logical condition inside filter

You can put your conditionals in the filter function. You're almost there in your example :-)

########
# Setup
########
set.seed(327) # Setting a seed makes the example reproducible

ID <- seq(1:50)
mou <-
  sample(c(2000, 2500, 440, 4990, 23000, 450, 3412, 4958, 745, 1000),
         50,
         replace = TRUE)
calls <-
  sample(c(50, 51, 12, 60, 90, 16, 89, 59, 33, 23, 50, 555), 50, replace = TRUE)
rev <-
  sample(c(100, 345, 758, 44, 58, 334, 888, 205, 940, 298, 754), 50, replace = TRUE)

dt <- data.frame(mou, calls, rev)

library(tidyverse)

########
# Here's the direct answer to your question
########
dt %>%
  filter(calls > 34 & calls < 200) %>% 
  filter(rev > 100 & rev < 400) %>% # Using two filters makes things more readable
  summarise(mean_mou = mean(mou))

# 3349

Variable logical operators to filter data

You can define a new operator to use instead of & and |. But it can’t be MYCHOICE — custom operators in R need to surrounded by %…%. You could go for %MYCHOICE% but I suggest something a little less verbose, e.g.:

`%.%` <- `&`

mtcars %>% filter(mpg > 3 & (cyl == 6) %.% (gear == 4) %.% (hp > 110))

(Note the parentheses around individual terms to ensure correct operator precedence.)

You can set %.% to whatever operator you want. If you want to set it based on a string, you could use e.g. switch:

choice <- 'And'

`%.%` <- switch(choice, And = `&`, Or = `|`)

Using multiple criteria in subset function and logical operators

The correct operator is %in% here. Here is an example with dummy data:

set.seed(1)
dat <- data.frame(bf11 = sample(4, 10, replace = TRUE),
                  foo = runif(10))

giving:

> head(dat)
  bf11       foo
1    2 0.2059746
2    2 0.1765568
3    3 0.6870228
4    4 0.3841037
5    1 0.7698414
6    4 0.4976992

The subset of dat where bf11 equals any of the set 1,2,3 is taken as follows using %in%:

> subset(dat, subset = bf11 %in% c(1,2,3))
   bf11       foo
1     2 0.2059746
2     2 0.1765568
3     3 0.6870228
5     1 0.7698414
8     3 0.9919061
9     3 0.3800352
10    1 0.7774452

As to why your original didn't work, break it down to see the problem. Look at what 1||2||3 evaluates to:

> 1 || 2 || 3
[1] TRUE

and you'd get the same using | instead. As a result, the subset() call would only return rows where bf11 was TRUE (or something that evaluated to TRUE).

What you could have written would have been something like:

subset(dat, subset = bf11 == 1 | bf11 == 2 | bf11 == 3)

Which gives the same result as my earlier subset() call. The point is that you need a series of single comparisons, not a comparison of a series of options. But as you can see, %in% is far more useful and less verbose in such circumstances. Notice also that I have to use | as I want to compare each element of bf11 against 1, 2, and 3, in turn. Compare:

> with(dat, bf11 == 1 || bf11 == 2)
[1] TRUE
> with(dat, bf11 == 1 | bf11 == 2)
 [1]  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE

Is there a way to pass arguments with logical operators (!=, , ) to a function?

Certainly there is a way: operators are regular functions in R, you can pass them around like any other function.

The only complication is that the operators are non-syntactic names so you can’t just pass them “as is”, this would confuse the parser. Instead, you need to wrap them in backticks, to make their use syntactically valid where a name would be expected:

filter_something = function (value, op) {
    op(value, 13)
}

filter_something(cars$speed, `>`)
filter_something(cars$speed, `<`)
filter_something(cars$speed, `==`)

And since R also supports non-standard evaluation of function arguments, you can also pass unevaluated expressions — this gets slightly more complicated, since you’d want to evaluate them in the correct context. ‘rlang’/‘dplyr’ uses data masking for this.

How exactly you need to apply this depends entirely on the context in which the expression is to be used. In many cases, you can simply dispatch them to the corresponding ‘dplyr’ functions, e.g.

filter_something2 = function (.data, expr) {
    .data %>%
        filter({{expr}})
}

filter_something2(cars, speed < 13)

The “secret sauce” here is the {{…}} syntax. This works because filter from ‘dplyr’ accepts unevaluated arguments and handles {{expr}} specially by transforming it into (effectively) !! enexpr(expr). That is: expr is first “defused”: it is explicitly marked as unevaluated, and the name expr is replaced by the unevaluated expression it binds to (speed < 13 in the above). Next, this unevaluated expression is unquoted. That is, the wrapper is “peeled off” from the expression, and that unevaluated expression itself is handled inside filter as if it were passed as filter(.data, speed < 13). In other words: the name expr is substituted with the speed < 13 in the call expression.

For a more thorough explanation, please refer to the Programming with dplyr vignette.

Subset dataframe by multiple logical conditions of rows to remove

The ! should be around the outside of the statement:

data[!(data$v1 %in% c("b", "d", "e")), ]

  v1 v2 v3 v4
1  a  v  d  c
2  a  v  d  d
5  c  k  d  c
6  c  r  p  g

How to perform logical operators through indexing in data.table at R?

From ?setkey, key(dt) get the key columns in a character vector. Assuming your table has a single key column, then you can get what you want with:

dt[dt[[key(dt)]] < 10]

Thanks to David Arenburg, you can also use get():

dt[get(key(dt)) < 10]

This is a little bit shorter and probably the way to go.

The other way I can think to do it is much worse:

dt[eval(parse(text = paste(key(dt), "< 10")))]

Logic operations with string/character data in R

Here's my first pass at it. There's room for improvement here, but maybe it will set you on the right direction. This question seems well suited for dplyr, and I'd be very interested in seeing answer using that package. Here's how you could do this in base:

The basic idea is to use pattern matching with grepl() to detect correct answers. That's straight forward because we can use the apply() family to compare each row of your response dataset to your dataset of correct answers.

The way I see it, the challenging bit is to score responses only within a group of answers. I do that here with more pattern matching to find unique groups, then calculating a sum of scores within each group in a for-loop.

Are these the results you were expecting?

# define dataset of correct answers only
ans <- data[1,]
# define dataset of responses only
data <- data[-1,]

# Get column indices for each group of answers
vars <- unique(gsub(pattern = ".?\\d+", replacement = "", names(data)))
vars
#>  [1] "PRE_TR"        "PRE_RULE_LLN"  "PRE_IS"        "PRE_RULE_CLT" 
#>  [5] "PT"            "POST_IS"       "POST_TR"       "POST_RULE_CLT"
#>  [9] "POST_RULE_LLN" "Ex"            "TIME"
colgroups <- sapply(1:length(vars), FUN = function(x) grep(vars[x], names(data)))

# Detect correct answers
scores <- sapply(1:ncol(data), FUN = function(x) ifelse(grepl(pattern = ans[1,x], x = data[,x]),1,0))
scores
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> [1,]    1    1    1    1    1    0    1    1    1     1     0     1    NA    NA
#> [2,]    1    1    0    1    1    0    1    0    1     0     0     1    NA    NA
#>      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#> [1,]    NA    NA    NA    NA    NA    NA    NA    NA     1     1     1     1
#> [2,]    NA    NA    NA    NA    NA    NA    NA    NA     0     0     0     0
#>      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
#> [1,]     1     1     0     1     1     1     1     1     1     1    NA
#> [2,]     1     1     0     1     0     1     1     1     1     1    NA

# Make output dataframe
output <- data.frame(matrix(ncol = length(vars), nrow = nrow(data)))
names(output) <- vars
output
#>   PRE_TR PRE_RULE_LLN PRE_IS PRE_RULE_CLT PT POST_IS POST_TR POST_RULE_CLT
#> 1     NA           NA     NA           NA NA      NA      NA            NA
#> 2     NA           NA     NA           NA NA      NA      NA            NA
#>   POST_RULE_LLN Ex TIME
#> 1            NA NA   NA
#> 2            NA NA   NA

# Sum correct answers one group at a time
scores <- data.frame(scores)
for(i in 1:length(colgroups)){
  # We only need to sum if a group has more than one response
  # Multiple respones per group
  if(length(colgroups[[i]]) > 1){
    output[,vars[i]] <- rowSums(scores[,colgroups[[i]]])
  } else {
    # One response per group
    output[,vars[i]] <- scores[,colgroups[[i]]]
  }
}

output
#>   PRE_TR PRE_RULE_LLN PRE_IS PRE_RULE_CLT PT POST_IS POST_TR POST_RULE_CLT
#> 1      4            1      4            1 NA       5       4             1
#> 2      3            1      2            1 NA       1       3             1
#>   POST_RULE_LLN Ex TIME
#> 1             1  2   NA
#> 2             1  2   NA
Created on 2021-10-08 by the reprex package (v2.0.1)

Correctly Specifying "Logical Conditions" (In R)