Creating a New Variable from a Lookup Table

Create new variable using a lookup table

If we do a left_join betweeen the first and lookup dataset based on 'sex', 'age, we get two 'length' column, do the comparison between those columns and create a new column with ifelse or case_when

library(dplyr)
left_join(df1, lookup, by = c('sex', 'age')) %>%
transmute(id, sex, age,
growth.rate = case_when(length.x <= length.y ~ "Low",
TRUE ~ "Normal"), length = length.x)
# id sex age growth.rate length
#1 1 Female 1 Low 45
#2 2 Female 2 Normal 54
#3 3 Female 3 Low 56
#4 4 Female 4 Normal 60
#5 5 Female 5 Low 60
#6 6 Female 6 Low 61
#7 7 Female 7 Low 63
#8 8 Male 1 Normal 55
#9 9 Male 2 Low 54
#10 10 Male 3 Normal 58
#11 11 Male 4 Normal 61
#12 12 Male 5 Normal 65
#13 13 Male 6 Low 63
#14 14 Male 7 Low 65
#15 15 Male 8 Normal 67
#16 16 Male 9 Normal 68
#17 17 Male 10 Low 69

In data.table, this can be made more compact

library(data.table)
setDT(df1)[lookup, growth.rate := fcase(length <= i.length, "Low",
"Normal"), on = .(sex, age)]

Or with an index

setDT(df1)[lookup, growth.rate := 
c("Normal", "Low")[1 + (length <= i.length)], on = .(sex, age)]

data

df1 <- structure(list(id = 1:17, sex = c("Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male"), age = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L
), length = c(45L, 54L, 56L, 60L, 60L, 61L, 63L, 55L, 54L, 58L,
61L, 65L, 63L, 65L, 67L, 68L, 69L)), class = "data.frame", row.names = c(NA,
-17L))

lookup <- structure(list(sex = c("Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male"), age = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L
), length = c(50L, 53L, 56L, 58L, 60L, 61L, 63L, 50L, 54L, 57L,
60L, 62L, 63L, 65L, 66L, 67L, 69L)), class = "data.frame", row.names = c(NA,
-17L))

Creating a new variable from a lookup table

define your lookup table

lookup= data.frame( 
base=c(0,1,2,3,4),
aresult=c("strikeout","single","double","triple","home run"))

then use join from plyr

dataset = join(dataset,lookup,by='aresult')

Create new variable based on the Look up table

To get to what you want you need to organise the table and categorise the data. I have provided a potential workflow to handle such situations. Hope this is helpful:

library(tidyverse)

df1 <- data.frame(
Amount = c(2500L, 3600L, 7000L, 12000L, 16000L),
Term = c(23L, 30L, 45L, 50L, 38L)
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# functions for analysis ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

amount_tier_function <- function(x){

case_when(x <= 5000 ~ "Tier_5000",
x <= 10000 ~ "Tier_10000",
x <= 15000 ~ "Tier_15000",
x <= 20000 ~ "Tier_20000",
TRUE ~ "Tier_25000")
}


month_tier_function <- function(x){

case_when(x <= 24 ~ "Tier_24",
x <= 36 ~ "Tier_36",
x <= 48 ~ "Tier_48",
TRUE ~ "Tier_60")
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Recut lookup table headings ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

lookup_df <- data.frame(stringsAsFactors=FALSE,
amount_tier = c("Tier_5000", "Tier_10000", "Tier_15000", "Tier_20000",
"Tier_25000"),
Tier_24 = c(133L, 191L, 229L, 600L, 635L),
Tier_36 = c(163L, 213L, 252L, 615L, 645L),
Tier_48 = c(175L, 229L, 275L, 625L, 675L),
Tier_60 = c(186L, 249L, 306L, 719L, 786L)
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Join everything together ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

lookup_df_tidy <- lookup_df %>%
gather(mth_tier, Premium, - amount_tier)


df1 %>%
mutate(amount_tier = amount_tier_function(Amount),
mth_tier = month_tier_function(Term)) %>%
left_join(., lookup_df_tidy) %>%
select(-amount_tier, -mth_tier)

Create SAS variable based on values in look-up table

A lookup table for data value mapping is essentially a left join operation. SAS has a lot of ways to left join data, including

  • SQL
  • Merge
  • Hash object
  • Array (direct addressing)
  • Formats
  • Informats

Here are four ways: SQL, Merge, Array and Hash. The mapping from var* to category is done by the functional mapping int (value/10):

data have;
input ID $ varx vary;
datalines;
1 1 1
2 4 5
3 11 12
4 23 14
5 24 20
6 5 29 /* score should be 107 */
;

data lookup;
do index_y = 0 to 2;
do index_x = 0 to 2;
input lookup_value @@;
output;
end;
end;
datalines;
21 52 73
84 95 96
107 118 149
;

*------------------- SQL;

proc sql;
create table want as
select
id, lookup_value as score
from
have
left join
lookup
on
int (have.varx/10) = lookup.index_x
and
int (have.vary/10) = lookup.index_y
order by
id
;

*------------------- MERGE;

data have2(index=(myindexname=(xcat ycat)));
set have;
xcat = int(varx/10);
ycat = int(vary/10);
run;
proc sort data=lookup;
by index_x index_y;

options msglevel=i;

data want2(keep=id lookup_value rename=(lookup_value=score));
merge
have2(rename=(xcat=index_x ycat=index_y) in=left)
lookup
;
by index_x index_y;
if left;
run;

proc sort data=want2;
by id;
run;

*------------------- ARRAY DIRECT ADDRESSING;

data want3;
array lookup [0:2,0:2] _temporary_;
if _n_ = 1 then do until (endlookup);
set lookup end=endlookup;
lookup[index_x,index_y] = lookup_value;
end;

set have;
xcat = varx/10;
ycat = vary/10;
score = lookup[xcat,ycat];
keep id score;
run;

*------------------- HASH LOOKUP;

data want4;
if 0 then set lookup;
if _n_ = 1 then do;
declare hash lookup(dataset:'lookup');
lookup.defineKey('index_x', 'index_y');
lookup.defineData('lookup_value');
lookup.defineDone();
end;

set have;

index_x = int(varx/10);
index_y = int(vary/10);

if (lookup.find() = 0) then
score = lookup_value;

keep id score;
run;

Create new variable in R data frame by conditional lookup

Here's how I would approach this using the data.table packages

library(data.table)
setDT(df)[, numEnemies := rev(numPets), by = sub(".*(large|medium).*", "\\1", pet)]
df[grep("^small", pet), numEnemies := 0L]
# pet numPets numEnemies
# 1: smalldog 1 0
# 2: mediumdog 2 5
# 3: largedog 3 6
# 4: smallcat 4 0
# 5: mediumcat 5 2
# 6: largecat 6 3

What I basically did, is to first create groups of medium and large over the whole data set and just reverse the values within each group.
Then, I've assigned 0 to all the values in numPets when grep("^small", pet).

This should be both very efficient and robust, as it will work on any number of animals and you don't actually need to know the animals names apriori.

Assigning a large number of variables (from a lookup table?)

lookuptab[lookuptab$Name==lookupvalue,2]

Or, as a function:

lookup <- function(lookupvalue)
lookuptab[lookuptab$Name==lookupvalue,2]
lookup("ArcBo")
lookup("GadMa")

How to create a calculated column using a lookup table and a formula in R?

Edit Updated answer to updated question (data frame has 1,125 columns):

df1_V <- as.matrix(df1) # or select the "V" columns using df1[, 1:1125]
df1$new_column <- df1_V %*% df2$coef / df2$weight[1]

This is a general solution which will work for any number of columns as long as the columns of df1 are arranged in the same manner (across the columns of the data frame) as the coef values are ordered (row-wise) in df2, and the number of columns in df1 equals the number of rows in df2, that is ncol(df1_V) = nrow(df2).


Answer (to original question):

library(dplyr)

df %>%
mutate(new_column = (V1*0.82 + V2*0.75 + V3*0.67) / 4.77)

   V1 V2 V3 new_column
1 3 4 3 1.5660377
2 2 4 3 1.3941300
3 4 4 3 1.7379455
4 4 4 4 1.8784067
5 1 4 2 1.0817610
6 4 2 4 1.5639413
...

Alternative:

df1$new_column <- as.matrix(df1) %*% c(0.82, 0.75, 0.67) / 4.77

How to use lookup table to label columns in R with clean variable names?

One option to use your clean variable via labs may look like so. A drawback of this approach is that you have to specify the name of the scale or guide you want to label and the name of the variable:

library(ggplot2)

ggplot(data=df, aes(var1))+
geom_bar() +
labs(x = vars_clean[["var1"]])

Sample Image

A second approach to overcome these drawbacks would be to make use of ggeasy::easy_labs which builds on the labelled package. Here the labels are added as attributes to the dataset.

library(ggeasy)
library(labelled)

labelled::var_label(df) <- vars_clean

ggplot(data=df, aes(var1))+
geom_bar() +
easy_labs()

Sample Image



Related Topics



Leave a reply



Submit