Create new variable using a lookup table
If we do a left_join
betweeen the first and lookup dataset based on 'sex', 'age, we get two 'length' column, do the comparison between those columns and create a new column with ifelse
or case_when
library(dplyr)
left_join(df1, lookup, by = c('sex', 'age')) %>%
transmute(id, sex, age,
growth.rate = case_when(length.x <= length.y ~ "Low",
TRUE ~ "Normal"), length = length.x)
# id sex age growth.rate length
#1 1 Female 1 Low 45
#2 2 Female 2 Normal 54
#3 3 Female 3 Low 56
#4 4 Female 4 Normal 60
#5 5 Female 5 Low 60
#6 6 Female 6 Low 61
#7 7 Female 7 Low 63
#8 8 Male 1 Normal 55
#9 9 Male 2 Low 54
#10 10 Male 3 Normal 58
#11 11 Male 4 Normal 61
#12 12 Male 5 Normal 65
#13 13 Male 6 Low 63
#14 14 Male 7 Low 65
#15 15 Male 8 Normal 67
#16 16 Male 9 Normal 68
#17 17 Male 10 Low 69
In data.table
, this can be made more compact
library(data.table)
setDT(df1)[lookup, growth.rate := fcase(length <= i.length, "Low",
"Normal"), on = .(sex, age)]
Or with an index
setDT(df1)[lookup, growth.rate :=
c("Normal", "Low")[1 + (length <= i.length)], on = .(sex, age)]
data
df1 <- structure(list(id = 1:17, sex = c("Female", "Female", "Female",
"Female", "Female", "Female", "Female", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male"), age = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L
), length = c(45L, 54L, 56L, 60L, 60L, 61L, 63L, 55L, 54L, 58L,
61L, 65L, 63L, 65L, 67L, 68L, 69L)), class = "data.frame", row.names = c(NA,
-17L))
lookup <- structure(list(sex = c("Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male"), age = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L
), length = c(50L, 53L, 56L, 58L, 60L, 61L, 63L, 50L, 54L, 57L,
60L, 62L, 63L, 65L, 66L, 67L, 69L)), class = "data.frame", row.names = c(NA,
-17L))
Creating a new variable from a lookup table
define your lookup table
lookup= data.frame(
base=c(0,1,2,3,4),
aresult=c("strikeout","single","double","triple","home run"))
then use join from plyr
dataset = join(dataset,lookup,by='aresult')
Create new variable based on the Look up table
To get to what you want you need to organise the table and categorise the data. I have provided a potential workflow to handle such situations. Hope this is helpful:
library(tidyverse)
df1 <- data.frame(
Amount = c(2500L, 3600L, 7000L, 12000L, 16000L),
Term = c(23L, 30L, 45L, 50L, 38L)
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# functions for analysis ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
amount_tier_function <- function(x){
case_when(x <= 5000 ~ "Tier_5000",
x <= 10000 ~ "Tier_10000",
x <= 15000 ~ "Tier_15000",
x <= 20000 ~ "Tier_20000",
TRUE ~ "Tier_25000")
}
month_tier_function <- function(x){
case_when(x <= 24 ~ "Tier_24",
x <= 36 ~ "Tier_36",
x <= 48 ~ "Tier_48",
TRUE ~ "Tier_60")
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Recut lookup table headings ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
lookup_df <- data.frame(stringsAsFactors=FALSE,
amount_tier = c("Tier_5000", "Tier_10000", "Tier_15000", "Tier_20000",
"Tier_25000"),
Tier_24 = c(133L, 191L, 229L, 600L, 635L),
Tier_36 = c(163L, 213L, 252L, 615L, 645L),
Tier_48 = c(175L, 229L, 275L, 625L, 675L),
Tier_60 = c(186L, 249L, 306L, 719L, 786L)
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Join everything together ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
lookup_df_tidy <- lookup_df %>%
gather(mth_tier, Premium, - amount_tier)
df1 %>%
mutate(amount_tier = amount_tier_function(Amount),
mth_tier = month_tier_function(Term)) %>%
left_join(., lookup_df_tidy) %>%
select(-amount_tier, -mth_tier)
Create SAS variable based on values in look-up table
A lookup table for data value mapping is essentially a left join operation. SAS has a lot of ways to left join data, including
- SQL
- Merge
- Hash object
- Array (direct addressing)
- Formats
- Informats
Here are four ways: SQL, Merge, Array and Hash. The mapping from var*
to category
is done by the functional mapping int (value/10)
:
data have;
input ID $ varx vary;
datalines;
1 1 1
2 4 5
3 11 12
4 23 14
5 24 20
6 5 29 /* score should be 107 */
;
data lookup;
do index_y = 0 to 2;
do index_x = 0 to 2;
input lookup_value @@;
output;
end;
end;
datalines;
21 52 73
84 95 96
107 118 149
;
*------------------- SQL;
proc sql;
create table want as
select
id, lookup_value as score
from
have
left join
lookup
on
int (have.varx/10) = lookup.index_x
and
int (have.vary/10) = lookup.index_y
order by
id
;
*------------------- MERGE;
data have2(index=(myindexname=(xcat ycat)));
set have;
xcat = int(varx/10);
ycat = int(vary/10);
run;
proc sort data=lookup;
by index_x index_y;
options msglevel=i;
data want2(keep=id lookup_value rename=(lookup_value=score));
merge
have2(rename=(xcat=index_x ycat=index_y) in=left)
lookup
;
by index_x index_y;
if left;
run;
proc sort data=want2;
by id;
run;
*------------------- ARRAY DIRECT ADDRESSING;
data want3;
array lookup [0:2,0:2] _temporary_;
if _n_ = 1 then do until (endlookup);
set lookup end=endlookup;
lookup[index_x,index_y] = lookup_value;
end;
set have;
xcat = varx/10;
ycat = vary/10;
score = lookup[xcat,ycat];
keep id score;
run;
*------------------- HASH LOOKUP;
data want4;
if 0 then set lookup;
if _n_ = 1 then do;
declare hash lookup(dataset:'lookup');
lookup.defineKey('index_x', 'index_y');
lookup.defineData('lookup_value');
lookup.defineDone();
end;
set have;
index_x = int(varx/10);
index_y = int(vary/10);
if (lookup.find() = 0) then
score = lookup_value;
keep id score;
run;
Create new variable in R data frame by conditional lookup
Here's how I would approach this using the data.table
packages
library(data.table)
setDT(df)[, numEnemies := rev(numPets), by = sub(".*(large|medium).*", "\\1", pet)]
df[grep("^small", pet), numEnemies := 0L]
# pet numPets numEnemies
# 1: smalldog 1 0
# 2: mediumdog 2 5
# 3: largedog 3 6
# 4: smallcat 4 0
# 5: mediumcat 5 2
# 6: largecat 6 3
What I basically did, is to first create groups of medium
and large
over the whole data set and just reverse the values within each group.
Then, I've assigned 0
to all the values in numPets
when grep("^small", pet)
.
This should be both very efficient and robust, as it will work on any number of animals and you don't actually need to know the animals names apriori.
Assigning a large number of variables (from a lookup table?)
lookuptab[lookuptab$Name==lookupvalue,2]
Or, as a function:
lookup <- function(lookupvalue)
lookuptab[lookuptab$Name==lookupvalue,2]
lookup("ArcBo")
lookup("GadMa")
How to create a calculated column using a lookup table and a formula in R?
Edit Updated answer to updated question (data frame has 1,125 columns):
df1_V <- as.matrix(df1) # or select the "V" columns using df1[, 1:1125]
df1$new_column <- df1_V %*% df2$coef / df2$weight[1]
This is a general solution which will work for any number of columns as long as the columns of df1 are arranged in the same manner (across the columns of the data frame) as the coef values are ordered (row-wise) in df2, and the number of columns in df1 equals the number of rows in df2, that is ncol(df1_V) = nrow(df2)
.
Answer (to original question):
library(dplyr)
df %>%
mutate(new_column = (V1*0.82 + V2*0.75 + V3*0.67) / 4.77)
V1 V2 V3 new_column
1 3 4 3 1.5660377
2 2 4 3 1.3941300
3 4 4 3 1.7379455
4 4 4 4 1.8784067
5 1 4 2 1.0817610
6 4 2 4 1.5639413
...
Alternative:
df1$new_column <- as.matrix(df1) %*% c(0.82, 0.75, 0.67) / 4.77
How to use lookup table to label columns in R with clean variable names?
One option to use your clean variable via labs
may look like so. A drawback of this approach is that you have to specify the name of the scale
or guide
you want to label and the name of the variable:
library(ggplot2)
ggplot(data=df, aes(var1))+
geom_bar() +
labs(x = vars_clean[["var1"]])
A second approach to overcome these drawbacks would be to make use of ggeasy::easy_labs
which builds on the labelled
package. Here the labels are added as attributes to the dataset.
library(ggeasy)
library(labelled)
labelled::var_label(df) <- vars_clean
ggplot(data=df, aes(var1))+
geom_bar() +
easy_labs()
Related Topics
How to Make R Beep/Play a Sound at the End of a Script
Animated Sorted Bar Chart with Bars Overtaking Each Other
Insert Picture/Table in R Markdown
Apply a Function to Every Row of a Matrix or a Data Frame
How to Specify the Actual X Axis Values to Plot as X Axis Ticks in R
How to Add a Ggplot2 Subtitle with Different Size and Colour
Is There a Better Alternative Than String Manipulation to Programmatically Build Formulas
How to Convert R Markdown to HTML? I.E., What Does "Knit HTML" Do in Rstudio 0.96
How to Connect R with Access Database in 64-Bit Window
How to Calculate Combination and Permutation in R
How to Avoid Warning When Introducing Nas by Coercion
Function to Calculate Geospatial Distance Between Two Points (Lat,Long) Using R
How to Replace Nan Value with Zero in a Huge Data Frame
Make Conditionalpanel Depend on Files Uploaded with Fileinput