Vlookup Type Method in R

How to do vlookup and fill down (like in Excel) in R?

If I understand your question correctly, here are four methods to do the equivalent of Excel's VLOOKUP and fill down using R:

# load sample data from Q
hous <- read.table(header = TRUE,
stringsAsFactors = FALSE,
text="HouseType HouseTypeNo
Semi 1
Single 2
Row 3
Single 2
Apartment 4
Apartment 4
Row 3")

# create a toy large table with a 'HouseType' column
# but no 'HouseTypeNo' column (yet)
largetable <- data.frame(HouseType = as.character(sample(unique(hous$HouseType), 1000, replace = TRUE)), stringsAsFactors = FALSE)

# create a lookup table to get the numbers to fill
# the large table
lookup <- unique(hous)
HouseType HouseTypeNo
1 Semi 1
2 Single 2
3 Row 3
5 Apartment 4

Here are four methods to fill the HouseTypeNo in the largetable using the values in the lookup table:

First with merge in base:

# 1. using base 
base1 <- (merge(lookup, largetable, by = 'HouseType'))

A second method with named vectors in base:

# 2. using base and a named vector
housenames <- as.numeric(1:length(unique(hous$HouseType)))
names(housenames) <- unique(hous$HouseType)

base2 <- data.frame(HouseType = largetable$HouseType,
HouseTypeNo = (housenames[largetable$HouseType]))

Third, using the plyr package:

# 3. using the plyr package
library(plyr)
plyr1 <- join(largetable, lookup, by = "HouseType")

Fourth, using the sqldf package

# 4. using the sqldf package
library(sqldf)
sqldf1 <- sqldf("SELECT largetable.HouseType, lookup.HouseTypeNo
FROM largetable
INNER JOIN lookup
ON largetable.HouseType = lookup.HouseType")

If it's possible that some house types in largetable do not exist in lookup then a left join would be used:

sqldf("select * from largetable left join lookup using (HouseType)")

Corresponding changes to the other solutions would be needed too.

Is that what you wanted to do? Let me know which method you like and I'll add commentary.

VLookup type method in R

merge them:

> merge(full_list, replace, by.x="Ticker", by.y="Symbol")
Ticker Long_Name Type Location Month
1 AC Ethanol -- CBOT F US U13
2 AIC DJ UBS Commodity Index -- CBOT F US U13
3 BBS South American Soybeans -- CBOT F US U13
4 BO Soybean Oil -- CBT F US V13
5 C Corn -- CBT F US U13
6 DF Dow Jones Industrial Average -- CBT F US U13

Replace/Vlookup type function in R

You could try match

df1[-1] <- `dim<-`(df2$amount[match(as.matrix(df1[-1]),
df2$letters)], c(4,3))

df1[is.na(df1)] <- '-'
#wouldn't recommend to replace `NA` as it will convert to `character` class
df1
# ID value_1 value_2 value_3
#1 1 5 - 6
#2 2 5 20 5
#3 3 20 5 20
#4 4 - - -

Or an option using mutate_each from dplyr

library(dplyr)
df1 %>%
mutate_each(funs(df2$amount[match(., df2$letters)]), -ID)
# ID value_1 value_2 value_3
#1 1 5 NA 6
#2 2 5 20 5
#3 3 20 5 20
#4 4 NA NA NA

Equivalent of a Vlookup in R

In dplyr, we can select only the rows we're interested in in df2, then filter it to contain only distinct rows before joining it (left or right doesn't matter here) to `df1.

library(dplyr)
df2 %>%
select(unique_id, amount_due) %>%
distinct() %>%
right_join(df1, by = 'unique_id')

unique_id amount_due df1 df2 df3 df4
1 1234 $100 1 h 8/4/18 no
2 2341 $1 2 nl 8/5/18 yes
3 3412 $200 3 sg 8/3/18 no
4 4213 $2.22 4 hi 7/3/18 yes

Using Merge in R to replicate Vlookup

A dplyr solution.

library(dplyr)
dbase <- data.frame(machine_number = c("10","20","30","10","10","50"),
second_attribute=c("a","b","c","c","a","d"))
mbase <- data.frame(machine_number = c("10","20","30","40","50","60","70","80","90","100"),
rate=c(22,22,25,17,15,15,55,12,15,19))

left_join(dbase, mbase, by = "machine_number")

machine_number second_attribute rate
1 10 a 22
2 20 b 22
3 30 c 25
4 10 c 22
5 10 a 22
6 50 d 15

Matching values in a column using R - excel vlookup

We can just use match to find the index of those matching elements of 'y' with that of 'x' and use that to index to get the corresponding 'z'

dt[, Result1 := z[match(y,x)]]
dt
# x y z Result Result1
#1: 1 6 a na NA
#2: 2 5 b e e
#3: 3 4 c d d
#4: 4 3 d c c
#5: 5 2 e b b
#6: 5 1 f a a

How to do Vlookup using R for range data

This is somewhat dependent on what format your data is in after being read from Excel, but if it is in the format implied by your question, then you can do:

analysis$source_cost <- source_data$COST[
findInterval(analysis$distance_travel,
sapply(strsplit(source_data$DISTANCE, '-'),
function(x) as.numeric(x[1])))]

analysis$Difference <- analysis$total_cost - analysis$source_cost

analysis
#> loading_station distance_travel total_cost status source_cost Difference
#> 1 PUGU 40 4000 PAID 4800 -800
#> 2 PUGU 80 3200 PAID 4800 -1600
#> 3 MOROGORO 50 5000 PAID 4800 200
#> 4 MOROGORO 220 30400 PAID 6900 23500
#> 5 DODOMA 150 5100 PAID 5900 -800
#> 6 KIGOMA 90 2345 PAID 4800 -2455
#> 7 DODOMA 230 6000 PAID 7200 -1200
#> 8 DODOMA 180 16500 PAID 6200 10300
#> 9 KIGOMA 32 3000 PAID 4800 -1800
#> 10 DODOMA 45 6000 PAID 4800 1200
#> 11 DODOMA 65 5000 PAID 4800 200
#> 12 KIGOMA 77 1000 PAID 4800 -3800
#> 13 KIGOMA 90 4000 PAID 4800 -800

Data from question in reproducible format

source_data <- structure(list(DISTANCE = c("1-100", "101-120", "121-140", 
"141-160", "161-180", "181-200", "210-220", "221-240"), COST = c(4800L,
5100L, 5500L, 5900L, 6200L, 6600L, 6900L, 7200L)),
class = "data.frame", row.names = c(NA, -8L))

analysis <- structure(list(loading_station = c("PUGU", "PUGU", "MOROGORO",
"MOROGORO", "DODOMA", "KIGOMA", "DODOMA", "DODOMA", "KIGOMA",
"DODOMA", "DODOMA", "KIGOMA", "KIGOMA"), distance_travel = c(40L,
80L, 50L, 220L, 150L, 90L, 230L, 180L, 32L, 45L, 65L, 77L, 90L
), total_cost = c(4000L, 3200L, 5000L, 30400L, 5100L, 2345L,
6000L, 16500L, 3000L, 6000L, 5000L, 1000L, 4000L), status = c("PAID",
"PAID", "PAID", "PAID", "PAID", "PAID", "PAID", "PAID", "PAID",
"PAID", "PAID", "PAID", "PAID")), class = "data.frame", row.names = c(NA,
-13L))

Created on 2022-08-14 by the reprex package (v2.0.1)

Excel vlookup multiple values and add duplicate rows

As @r-schifini mentioned, there are several libraries you can use to import Excel files. Here I use the readxl package. To retain all rows from the first spreadsheet -- your Spreadsheet1 -- specify all.x=TRUE in the merge function. See ?merge for more details. Note that I've added one more row in Spreadsheet1 with fake data for type B700.

library(readxl)
ss1 <- read_excel(path = "spreadsheet1.xlsx", sheet = 1)
ss2 <- read_excel(path = "spreadsheet2.xlsx", sheet = 1)

out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBus1 Ground 15 <NA> NA

Why do we have NAs in row 8? It's because your type is TBus1 in Spreadsheet1, and Tbus1 in Spreadsheet2. To circumvent problems like this, we can change the case to upper before we do the merge.

ss1$Type <- toupper(ss1$Type)
ss2$Type <- toupper(ss2$Type)
out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBUS1 Ground 15 TbusT1 0


Related Topics



Leave a reply



Submit