Vlookup Type Method in R

How to do vlookup and fill down (like in Excel) in R?

If I understand your question correctly, here are four methods to do the equivalent of Excel's VLOOKUP and fill down using R:

# load sample data from Q
hous <- read.table(header = TRUE, 
                   stringsAsFactors = FALSE, 
text="HouseType HouseTypeNo
Semi            1
Single          2
Row             3
Single          2
Apartment       4
Apartment       4
Row             3")

# create a toy large table with a 'HouseType' column 
# but no 'HouseTypeNo' column (yet)
largetable <- data.frame(HouseType = as.character(sample(unique(hous$HouseType), 1000, replace = TRUE)), stringsAsFactors = FALSE)

# create a lookup table to get the numbers to fill
# the large table
lookup <- unique(hous)
  HouseType HouseTypeNo
1      Semi           1
2    Single           2
3       Row           3
5 Apartment           4

Here are four methods to fill the HouseTypeNo in the largetable using the values in the lookup table:

First with merge in base:

# 1. using base 
base1 <- (merge(lookup, largetable, by = 'HouseType'))

A second method with named vectors in base:

# 2. using base and a named vector
housenames <- as.numeric(1:length(unique(hous$HouseType)))
names(housenames) <- unique(hous$HouseType)

base2 <- data.frame(HouseType = largetable$HouseType,
                    HouseTypeNo = (housenames[largetable$HouseType]))

Third, using the plyr package:

# 3. using the plyr package
library(plyr)
plyr1 <- join(largetable, lookup, by = "HouseType")

Fourth, using the sqldf package

# 4. using the sqldf package
library(sqldf)
sqldf1 <- sqldf("SELECT largetable.HouseType, lookup.HouseTypeNo
FROM largetable
INNER JOIN lookup
ON largetable.HouseType = lookup.HouseType")

If it's possible that some house types in largetable do not exist in lookup then a left join would be used:

sqldf("select * from largetable left join lookup using (HouseType)")

Corresponding changes to the other solutions would be needed too.

Is that what you wanted to do? Let me know which method you like and I'll add commentary.

VLookup type method in R

merge them:

> merge(full_list, replace, by.x="Ticker", by.y="Symbol")
  Ticker                           Long_Name Type Location Month
1     AC                     Ethanol -- CBOT    F       US   U13
2    AIC      DJ UBS Commodity Index -- CBOT    F       US   U13
3    BBS     South American Soybeans -- CBOT    F       US   U13
4     BO                  Soybean Oil -- CBT    F       US   V13
5      C                         Corn -- CBT    F       US   U13
6     DF Dow Jones Industrial Average -- CBT    F       US   U13

Replace/Vlookup type function in R

You could try match

df1[-1] <- `dim<-`(df2$amount[match(as.matrix(df1[-1]),
                                  df2$letters)], c(4,3))

df1[is.na(df1)] <- '-' 
#wouldn't recommend to replace `NA` as it will convert to `character` class
df1
#  ID value_1 value_2 value_3
#1  1       5       -       6
#2  2       5      20       5
#3  3      20       5      20
#4  4       -       -       -

Or an option using mutate_each from dplyr

library(dplyr)
df1 %>% 
    mutate_each(funs(df2$amount[match(., df2$letters)]), -ID)
#    ID value_1 value_2 value_3
#1  1       5      NA       6
#2  2       5      20       5
#3  3      20       5      20
#4  4      NA      NA      NA

Equivalent of a Vlookup in R

In dplyr, we can select only the rows we're interested in in df2, then filter it to contain only distinct rows before joining it (left or right doesn't matter here) to `df1.

library(dplyr)
df2 %>%
    select(unique_id, amount_due) %>%
    distinct() %>%
    right_join(df1, by = 'unique_id')

  unique_id amount_due df1 df2    df3 df4
1      1234       $100   1   h 8/4/18  no
2      2341         $1   2  nl 8/5/18 yes
3      3412       $200   3  sg 8/3/18  no
4      4213      $2.22   4  hi 7/3/18 yes

Using Merge in R to replicate Vlookup

A dplyr solution.

library(dplyr)
dbase <- data.frame(machine_number = c("10","20","30","10","10","50"),
                second_attribute=c("a","b","c","c","a","d"))
mbase <- data.frame(machine_number = c("10","20","30","40","50","60","70","80","90","100"),
                    rate=c(22,22,25,17,15,15,55,12,15,19))

left_join(dbase, mbase, by = "machine_number") 

  machine_number second_attribute rate
1             10                a   22
2             20                b   22
3             30                c   25
4             10                c   22
5             10                a   22
6             50                d   15

Matching values in a column using R - excel vlookup

We can just use match to find the index of those matching elements of 'y' with that of 'x' and use that to index to get the corresponding 'z'

dt[, Result1 := z[match(y,x)]]
dt
#   x y z Result Result1
#1: 1 6 a     na      NA
#2: 2 5 b      e       e
#3: 3 4 c      d       d
#4: 4 3 d      c       c
#5: 5 2 e      b       b
#6: 5 1 f      a       a

How to do Vlookup using R for range data

This is somewhat dependent on what format your data is in after being read from Excel, but if it is in the format implied by your question, then you can do:

analysis$source_cost <- source_data$COST[
  findInterval(analysis$distance_travel,
               sapply(strsplit(source_data$DISTANCE, '-'), 
                               function(x) as.numeric(x[1])))]

analysis$Difference <- analysis$total_cost - analysis$source_cost

analysis
#>    loading_station distance_travel total_cost status source_cost Difference
#> 1             PUGU              40       4000   PAID        4800       -800
#> 2             PUGU              80       3200   PAID        4800      -1600
#> 3         MOROGORO              50       5000   PAID        4800        200
#> 4         MOROGORO             220      30400   PAID        6900      23500
#> 5           DODOMA             150       5100   PAID        5900       -800
#> 6           KIGOMA              90       2345   PAID        4800      -2455
#> 7           DODOMA             230       6000   PAID        7200      -1200
#> 8           DODOMA             180      16500   PAID        6200      10300
#> 9           KIGOMA              32       3000   PAID        4800      -1800
#> 10          DODOMA              45       6000   PAID        4800       1200
#> 11          DODOMA              65       5000   PAID        4800        200
#> 12          KIGOMA              77       1000   PAID        4800      -3800
#> 13          KIGOMA              90       4000   PAID        4800       -800

Data from question in reproducible format

source_data <- structure(list(DISTANCE = c("1-100", "101-120", "121-140", 
"141-160", "161-180", "181-200", "210-220", "221-240"), COST = c(4800L, 
5100L, 5500L, 5900L, 6200L, 6600L, 6900L, 7200L)), 
class = "data.frame", row.names = c(NA, -8L))

analysis <- structure(list(loading_station = c("PUGU", "PUGU", "MOROGORO", 
"MOROGORO", "DODOMA", "KIGOMA", "DODOMA", "DODOMA", "KIGOMA", 
"DODOMA", "DODOMA", "KIGOMA", "KIGOMA"), distance_travel = c(40L, 
80L, 50L, 220L, 150L, 90L, 230L, 180L, 32L, 45L, 65L, 77L, 90L
), total_cost = c(4000L, 3200L, 5000L, 30400L, 5100L, 2345L, 
6000L, 16500L, 3000L, 6000L, 5000L, 1000L, 4000L), status = c("PAID", 
"PAID", "PAID", "PAID", "PAID", "PAID", "PAID", "PAID", "PAID", 
"PAID", "PAID", "PAID", "PAID")), class = "data.frame", row.names = c(NA, 
-13L))

^{Created on 2022-08-14 by the reprex package (v2.0.1)}

Excel vlookup multiple values and add duplicate rows

As @r-schifini mentioned, there are several libraries you can use to import Excel files. Here I use the readxl package. To retain all rows from the first spreadsheet -- your Spreadsheet1 -- specify all.x=TRUE in the merge function. See ?merge for more details. Note that I've added one more row in Spreadsheet1 with fake data for type B700.

library(readxl)
ss1 <- read_excel(path = "spreadsheet1.xlsx", sheet = 1)
ss2 <- read_excel(path = "spreadsheet2.xlsx", sheet = 1)

out <- merge(ss1, ss2, all.x=TRUE)
out
#    Type Category NumItem TypeElement NumEngine
# 1  A380      Air       5       380T1        10
# 2  B700      Air       8        <NA>        NA
# 3  B747      Air      10       747T1         2
# 4  B747      Air      10       747T2         4
# 5  B747      Air      10       747T3         8
# 6  B777      Air      20       777T1         6
# 7  B777      Air      20       777T2         4
# 8 TBus1   Ground      15        <NA>        NA

Why do we have NAs in row 8? It's because your type is TBus1 in Spreadsheet1, and Tbus1 in Spreadsheet2. To circumvent problems like this, we can change the case to upper before we do the merge.

ss1$Type <- toupper(ss1$Type)
ss2$Type <- toupper(ss2$Type)
out <- merge(ss1, ss2, all.x=TRUE)
out
#    Type Category NumItem TypeElement NumEngine
# 1  A380      Air       5       380T1        10
# 2  B700      Air       8        <NA>        NA
# 3  B747      Air      10       747T1         2
# 4  B747      Air      10       747T2         4
# 5  B747      Air      10       747T3         8
# 6  B777      Air      20       777T1         6
# 7  B777      Air      20       777T2         4
# 8 TBUS1   Ground      15      TbusT1         0