Correctly Specifying Logical Conditions (in R)
UPDATE:
I think I was able to resolve this problem - now the "logical conditions" are respected in the final output:
#load libraries
library(dplyr)
library(mco)
#define function
funct_set <- function (x) {
x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]; x5 <- x[5]; x6 <- x[6]; x[7] <- x[7]
f <- numeric(4)
#bin data according to random criteria
train_data <- train_data %>%
mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
train_data$cat = as.factor(train_data$cat)
#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)
b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)
c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)
#calculate quantile ("quant") for each bin
table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[5],1,0 )))
table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[6],1,0 )))
table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[7],1,0 )))
f[1] = mean(table_a$quant)
f[2] = mean(table_b$quant)
f[3] = mean(table_c$quant)
#group all tables
final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
f[4] = mean(final_table$quant)
return (f);
}
gn <- function(x) {
g1 <- x[3] - x[1]
g2<- x[4] - x[2]
g3 <- x[7] - x[6]
g4 <- x[6] - x[5]
return(c(g1,g2,g3,g4))
}
optimization <- nsga2(funct_set, idim = 7, odim = 4 , constraints = gn, cdim = 4,
generations=150,
popsize=100,
cprob=0.7,
cdist=20,
mprob=0.2,
mdist=20,
lower.bounds=rep(80,80,80,80, 100,200,300),
upper.bounds=rep(120,120,120,120,200,300,400)
)
Now, if we take a look at the output:
#view output
optimization
All the logical conditions (i.e. the "constraints") are now respected!
Note: if possible, I would still be interested in seeing alternate ways to solve this problem
Thanks everyone!
How to write more than one logical condition inside filter
You can put your conditionals in the filter
function. You're almost there in your example :-)
########
# Setup
########
set.seed(327) # Setting a seed makes the example reproducible
ID <- seq(1:50)
mou <-
sample(c(2000, 2500, 440, 4990, 23000, 450, 3412, 4958, 745, 1000),
50,
replace = TRUE)
calls <-
sample(c(50, 51, 12, 60, 90, 16, 89, 59, 33, 23, 50, 555), 50, replace = TRUE)
rev <-
sample(c(100, 345, 758, 44, 58, 334, 888, 205, 940, 298, 754), 50, replace = TRUE)
dt <- data.frame(mou, calls, rev)
library(tidyverse)
########
# Here's the direct answer to your question
########
dt %>%
filter(calls > 34 & calls < 200) %>%
filter(rev > 100 & rev < 400) %>% # Using two filters makes things more readable
summarise(mean_mou = mean(mou))
# 3349
Variable logical operators to filter data
You can define a new operator to use instead of &
and |
. But it can’t be MYCHOICE
— custom operators in R need to surrounded by %…%
. You could go for %MYCHOICE%
but I suggest something a little less verbose, e.g.:
`%.%` <- `&`
mtcars %>% filter(mpg > 3 & (cyl == 6) %.% (gear == 4) %.% (hp > 110))
(Note the parentheses around individual terms to ensure correct operator precedence.)
You can set %.%
to whatever operator you want. If you want to set it based on a string, you could use e.g. switch
:
choice <- 'And'
`%.%` <- switch(choice, And = `&`, Or = `|`)
Using multiple criteria in subset function and logical operators
The correct operator is %in%
here. Here is an example with dummy data:
set.seed(1)
dat <- data.frame(bf11 = sample(4, 10, replace = TRUE),
foo = runif(10))
giving:
> head(dat)
bf11 foo
1 2 0.2059746
2 2 0.1765568
3 3 0.6870228
4 4 0.3841037
5 1 0.7698414
6 4 0.4976992
The subset of dat
where bf11
equals any of the set 1,2,3
is taken as follows using %in%
:
> subset(dat, subset = bf11 %in% c(1,2,3))
bf11 foo
1 2 0.2059746
2 2 0.1765568
3 3 0.6870228
5 1 0.7698414
8 3 0.9919061
9 3 0.3800352
10 1 0.7774452
As to why your original didn't work, break it down to see the problem. Look at what 1||2||3
evaluates to:
> 1 || 2 || 3
[1] TRUE
and you'd get the same using |
instead. As a result, the subset()
call would only return rows where bf11
was TRUE
(or something that evaluated to TRUE
).
What you could have written would have been something like:
subset(dat, subset = bf11 == 1 | bf11 == 2 | bf11 == 3)
Which gives the same result as my earlier subset()
call. The point is that you need a series of single comparisons, not a comparison of a series of options. But as you can see, %in%
is far more useful and less verbose in such circumstances. Notice also that I have to use |
as I want to compare each element of bf11
against 1
, 2
, and 3
, in turn. Compare:
> with(dat, bf11 == 1 || bf11 == 2)
[1] TRUE
> with(dat, bf11 == 1 | bf11 == 2)
[1] TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
Is there a way to pass arguments with logical operators (!=, , ) to a function?
Certainly there is a way: operators are regular functions in R, you can pass them around like any other function.
The only complication is that the operators are non-syntactic names so you can’t just pass them “as is”, this would confuse the parser. Instead, you need to wrap them in backticks, to make their use syntactically valid where a name would be expected:
filter_something = function (value, op) {
op(value, 13)
}
filter_something(cars$speed, `>`)
filter_something(cars$speed, `<`)
filter_something(cars$speed, `==`)
And since R also supports non-standard evaluation of function arguments, you can also pass unevaluated expressions — this gets slightly more complicated, since you’d want to evaluate them in the correct context. ‘rlang’/‘dplyr’ uses data masking for this.
How exactly you need to apply this depends entirely on the context in which the expression is to be used. In many cases, you can simply dispatch them to the corresponding ‘dplyr’ functions, e.g.
filter_something2 = function (.data, expr) {
.data %>%
filter({{expr}})
}
filter_something2(cars, speed < 13)
The “secret sauce” here is the {{…}}
syntax. This works because filter
from ‘dplyr’ accepts unevaluated arguments and handles {{expr}}
specially by transforming it into (effectively) !! enexpr(expr)
. That is: expr
is first “defused”: it is explicitly marked as unevaluated, and the name expr
is replaced by the unevaluated expression it binds to (speed < 13
in the above). Next, this unevaluated expression is unquoted. That is, the wrapper is “peeled off” from the expression, and that unevaluated expression itself is handled inside filter
as if it were passed as filter(.data, speed < 13)
. In other words: the name expr
is substituted with the speed < 13
in the call expression.
For a more thorough explanation, please refer to the Programming with dplyr vignette.
Subset dataframe by multiple logical conditions of rows to remove
The !
should be around the outside of the statement:
data[!(data$v1 %in% c("b", "d", "e")), ]
v1 v2 v3 v4
1 a v d c
2 a v d d
5 c k d c
6 c r p g
How to perform logical operators through indexing in data.table at R?
From ?setkey
, key(dt)
get the key columns in a character vector. Assuming your table has a single key column, then you can get what you want with:
dt[dt[[key(dt)]] < 10]
Thanks to David Arenburg, you can also use get()
:
dt[get(key(dt)) < 10]
This is a little bit shorter and probably the way to go.
The other way I can think to do it is much worse:
dt[eval(parse(text = paste(key(dt), "< 10")))]
Logic operations with string/character data in R
Here's my first pass at it. There's room for improvement here, but maybe it will set you on the right direction. This question seems well suited for dplyr
, and I'd be very interested in seeing answer using that package. Here's how you could do this in base:
The basic idea is to use pattern matching with grepl()
to detect correct answers. That's straight forward because we can use the apply()
family to compare each row of your response dataset to your dataset of correct answers.
The way I see it, the challenging bit is to score responses only within a group of answers. I do that here with more pattern matching to find unique groups, then calculating a sum of scores within each group in a for-loop.
Are these the results you were expecting?
# define dataset of correct answers only
ans <- data[1,]
# define dataset of responses only
data <- data[-1,]
# Get column indices for each group of answers
vars <- unique(gsub(pattern = ".?\\d+", replacement = "", names(data)))
vars
#> [1] "PRE_TR" "PRE_RULE_LLN" "PRE_IS" "PRE_RULE_CLT"
#> [5] "PT" "POST_IS" "POST_TR" "POST_RULE_CLT"
#> [9] "POST_RULE_LLN" "Ex" "TIME"
colgroups <- sapply(1:length(vars), FUN = function(x) grep(vars[x], names(data)))
# Detect correct answers
scores <- sapply(1:ncol(data), FUN = function(x) ifelse(grepl(pattern = ans[1,x], x = data[,x]),1,0))
scores
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> [1,] 1 1 1 1 1 0 1 1 1 1 0 1 NA NA
#> [2,] 1 1 0 1 1 0 1 0 1 0 0 1 NA NA
#> [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#> [1,] NA NA NA NA NA NA NA NA 1 1 1 1
#> [2,] NA NA NA NA NA NA NA NA 0 0 0 0
#> [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
#> [1,] 1 1 0 1 1 1 1 1 1 1 NA
#> [2,] 1 1 0 1 0 1 1 1 1 1 NA
# Make output dataframe
output <- data.frame(matrix(ncol = length(vars), nrow = nrow(data)))
names(output) <- vars
output
#> PRE_TR PRE_RULE_LLN PRE_IS PRE_RULE_CLT PT POST_IS POST_TR POST_RULE_CLT
#> 1 NA NA NA NA NA NA NA NA
#> 2 NA NA NA NA NA NA NA NA
#> POST_RULE_LLN Ex TIME
#> 1 NA NA NA
#> 2 NA NA NA
# Sum correct answers one group at a time
scores <- data.frame(scores)
for(i in 1:length(colgroups)){
# We only need to sum if a group has more than one response
# Multiple respones per group
if(length(colgroups[[i]]) > 1){
output[,vars[i]] <- rowSums(scores[,colgroups[[i]]])
} else {
# One response per group
output[,vars[i]] <- scores[,colgroups[[i]]]
}
}
output
#> PRE_TR PRE_RULE_LLN PRE_IS PRE_RULE_CLT PT POST_IS POST_TR POST_RULE_CLT
#> 1 4 1 4 1 NA 5 4 1
#> 2 3 1 2 1 NA 1 3 1
#> POST_RULE_LLN Ex TIME
#> 1 1 2 NA
#> 2 1 2 NA
Created on 2021-10-08 by the reprex package (v2.0.1)
Related Topics
Propagating Data Within a Vector
Reshaping Data Frame with Duplicates
How to Create a Bipartite Network in R with Igraph or Tnet
Get Connected Components Using Igraph in R
Installing Package - Cannot Open File - Permission Denied
What Evaluates to True/False in R
How to Remove Rows with 0 Values Using R
Remove Extra Space and Ring at the Edge of a Polar Plot
How to Add Rtools\Bin to the System Path in R
R: Extracting "Clean" Utf-8 Text from a Web Page Scraped with Rcurl
Round a Posix Date (Posixct) with Base R Functionality
Plot Data Over Background Image with Ggplot
Shinydashboard Some Font Awesome Icons Not Working
How to Insert a Dataframe into a SQL Server Table
R - How to Make a Click on Webpage Using Rvest or Rcurl
Create an R Package That Depends on Another R Package Located on Github