R Convert Zipcode or Lat/Long to County

R convert zipcode or lat/long to county

I ended up using the suggestion from JoshO'Brien mentioned above and found here.

I took his code and changed state to county as shown here:

library(sp)
library(maps)
library(maptools)

# The single argument to this function, pointsDF, is a data.frame in which:
# - column 1 contains the longitude in degrees (negative in the US)
# - column 2 contains the latitude in degrees

latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per county
counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))

# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))

# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)

# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
countyNames[indices]
}

# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = c(-90, -120), y = c(44, 44))

latlong2county(testPoints)
[1] "wisconsin,juneau" "oregon,crook" # IT WORKS

Zip Codes from Lat/Lon (batch query)

Using the Open Streetmap API, you could try

library(RCurl)
library(RJSONIO)

latlon2zip <- function(lat, lon) {
url <- sprintf("http://nominatim.openstreetmap.org/reverse?format=json&lat=%f&lon=%f&zoom=18&addressdetails=1", lat, lon)
res <- fromJSON(url)
return(res[["address"]][["postcode"]])
}

latlon2zip(lat=52.5487429714954, lon=-1.81602098644987)

In order to use the latlon2zip function in transform, use Vectorize.

Latitude Longitude Coordinates to State Code in R

Here are two options, one using sf and one using sp package functions. sf is the more modern (and, here in 2020, recommended) package for analyzing spatial data, but in case it's still useful, I am leaving my original 2012 answer showing how to do this with sp-related functions.


Method 1 (using sf):

library(sf)
library(spData)

## pointsDF: A data.frame whose first column contains longitudes and
## whose second column contains latitudes.
##
## states: An sf MULTIPOLYGON object with 50 states plus DC.
##
## name_col: Name of a column in `states` that supplies the states'
## names.
lonlat_to_state <- function(pointsDF,
states = spData::us_states,
name_col = "NAME") {
## Convert points data.frame to an sf POINTS object
pts <- st_as_sf(pointsDF, coords = 1:2, crs = 4326)

## Transform spatial data to some planar coordinate system
## (e.g. Web Mercator) as required for geometric operations
states <- st_transform(states, crs = 3857)
pts <- st_transform(pts, crs = 3857)

## Find names of state (if any) intersected by each point
state_names <- states[[name_col]]
ii <- as.integer(st_intersects(pts, states))
state_names[ii]
}

## Test the function with points in Wisconsin, Oregon, and France
testPoints <- data.frame(x = c(-90, -120, 0), y = c(44, 44, 44))
lonlat_to_state(testPoints)
## [1] "Wisconsin" "Oregon" NA

If you need higher resolution state boundaries, read in your own vector data as an sf object using sf::st_read() or by some other means. One nice option is to install the rnaturalearth package and use it to load a state vector layer from rnaturalearthhires. Then use the lonlat_to_state() function we just defined as shown here:

library(rnaturalearth)
us_states_ne <- ne_states(country = "United States of America",
returnclass = "sf")
lonlat_to_state(testPoints, states = us_states_ne, name_col = "name")
## [1] "Wisconsin" "Oregon" NA

For very accurate results, you can download a geopackage containing GADM-maintained administrative borders for the United States from this page. Then, load the state boundary data and use them like this:

USA_gadm <- st_read(dsn = "gadm36_USA.gpkg", layer = "gadm36_USA_1")
lonlat_to_state(testPoints, states = USA_gadm, name_col = "NAME_1")
## [1] "Wisconsin" "Oregon" NA

Method 2 (using sp):

Here is a function that takes a data.frame of lat-longs within the lower 48 states, and for each point, returns the state in which it is located.

Most of the function simply prepares the SpatialPoints and SpatialPolygons objects needed by the over() function in the sp package, which does the real heavy lifting of calculating the 'intersection' of points and polygons:

library(sp)
library(maps)
library(maptools)

# The single argument to this function, pointsDF, is a data.frame in which:
# - column 1 contains the longitude in degrees (negative in the US)
# - column 2 contains the latitude in degrees

lonlat_to_state_sp <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per state (plus DC, minus HI & AK)
states <- map('state', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(states$names, ":"), function(x) x[1])
states_sp <- map2SpatialPolygons(states, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))

# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))

# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, states_sp)

# Return the state names of the Polygons object containing each point
stateNames <- sapply(states_sp@polygons, function(x) x@ID)
stateNames[indices]
}

# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = c(-90, -120), y = c(44, 44))

lonlat_to_state_sp(testPoints)
[1] "wisconsin" "oregon" # IT WORKS

Map zip codes to their respective city and state in R?

Answer updated

The zipcode package seems to have disappeared, so this answer has been updated to show how to add lat-lon from an external file. New answer at bottom.


You can get the data from the zipcode package and just do a merge to look things up.

zip = c("43031", "24517", "43224", "43832", "53022", 
"60185", "84104", "43081", "85226", "85193", "54656",
"43215", "94533", "95826", "64804", "49548", "54467")
ZC = data.frame(zip)

library(zipcode)
data(zipcode)
merge(ZC, zipcode)
zip city state latitude longitude
1 24517 Altavista VA 37.12754 -79.27409
2 43031 Johnstown OH 40.15198 -82.66944
3 43081 Westerville OH 40.10951 -82.91606
4 43215 Columbus OH 39.96513 -83.00431
5 43224 Columbus OH 40.03991 -82.96772
6 43832 Newcomerstown OH 40.27738 -81.59662
7 49548 Grand Rapids MI 42.86823 -85.66391
8 53022 Germantown WI 43.21916 -88.12043
9 54467 Plover WI 44.45228 -89.54399
10 54656 Sparta WI 43.96977 -90.80796
11 60185 West Chicago IL 41.89198 -88.20502
12 64804 Joplin MO 37.04716 -94.51124
13 84104 Salt Lake City UT 40.75063 -111.94077
14 85193 Casa Grande AZ 32.86000 -111.83000
15 85226 Chandler AZ 33.31221 -111.93177
16 94533 Fairfield CA 38.26958 -122.03701
17 95826 Sacramento CA 38.55010 -121.37492

If you need to keep the rows in the same order, you can just set the rownames on the zipcode data and use that to select the desired rows and columns.

rownames(zipcode) = zipcode$zip
zipcode[zip, 1:3]
zip city state
43031 43031 Johnstown OH
24517 24517 Altavista VA
43224 43224 Columbus OH
43832 43832 Newcomerstown OH
53022 53022 Germantown WI
60185 60185 West Chicago IL
84104 84104 Salt Lake City UT
43081 43081 Westerville OH
85226 85226 Chandler AZ
85193 85193 Casa Grande AZ
54656 54656 Sparta WI
43215 43215 Columbus OH
94533 94533 Fairfield CA
95826 95826 Sacramento CA
64804 64804 Joplin MO
49548 49548 Grand Rapids MI
54467 54467 Plover WI


Updated Answer

Since the zipcode package has disappeared, this shows how to add lat-lon information from a downloaded data set. The file that I am using exists today but the method should work for other files. See the GIS StackExchange for some leads on where to download data.

## Original Data to match
zip = c("43031", "24517", "43224", "43832", "53022",
"60185", "84104", "43081", "85226", "85193", "54656",
"43215", "94533", "95826", "64804", "49548", "54467")
ZC = data.frame(zip)

## Download source file, unzip and extract into table
ZipCodeSourceFile = "http://download.geonames.org/export/zip/US.zip"
temp <- tempfile()
download.file(ZipCodeSourceFile , temp)
ZipCodes <- read.table(unz(temp, "US.txt"), sep="\t")
unlink(temp)
names(ZipCodes) = c("CountryCode", "zip", "PlaceName",
"AdminName1", "AdminCode1", "AdminName2", "AdminCode2",
"AdminName3", "AdminCode3", "latitude", "longitude", "accuracy")

## merge extra info onto original data
fZC_Info = merge(ZC, ZipCodes[,c(2:6,10:11)])
head(ZC_Info)
zip PlaceName AdminName1 AdminCode1 AdminName2 latitude longitude
1 24517 Altavista Virginia VA Campbell 37.1222 -79.2911
2 43031 Johnstown Ohio OH Licking 40.1445 -82.6973
3 43081 Westerville Ohio OH Franklin 40.1146 -82.9105
4 43215 Columbus Ohio OH Franklin 39.9671 -83.0044
5 43224 Columbus Ohio OH Franklin 40.0425 -82.9689
6 43832 Newcomerstown Ohio OH Tuscarawas 40.2739 -81.5940

Fixing the dataset for zipcode to county conversion in R

You could use the following. But I'm not really sure how you want to decide whether you want the XX or the HH.

library(tidyverse)
zipcodeToCounty<- function(zip) {
tbl.county <- subset(zipcodeToCountyData, zipcode == zip)
if(nrow(tbl.county)==0){
tbl.county <- zipcodeToCountyData %>%
filter(str_sub(zipcode, 1,3) == str_sub(zip,1,3)) %>%
arrange(zipcode) %>% ## HH as first choice ??
head(1)
}
countyName <- tbl.county$county
stateName <- tbl.county$state
return(list(countyName, stateName))
}

How to efficiently map lat/long pairs to zipcodes (without an API)

Here's a complete solution for you:

library(sp)
library(maptools)
library(zipcode)

# grab the zip code boundaries
url <- "http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_zcta510_500k.zip"
fil <- "ztca.zip"

# don't waste bandwidth
if (!file.exists(fil)) { download.file(url, fil) }
unzip(fil, exdir="ztca")

# read them in (this takes a bit)
ztca <- readShapePoly("ztca/cb_2014_us_zcta510_500k.shp", verbose=TRUE)

# extract NY
ny <- ztca[as.character(ztca$ZCTA5CE10) %in% as.character(zipcode[zipcode$state=="NY",]$zip),]

# your points
latlongdata <-
structure(list(dropoff_longitude = c(-73.981705, -73.993553,
-73.973305, -73.988823, -73.938484, -74.015503, -73.95472, -73.9571,
-73.971298, -73.99794), dropoff_latitude = c(40.760559, 40.756348,
40.762646, 40.777504, 40.684692, 40.709881, 40.783371, 40.776657,
40.752148, 40.720535)), row.names = c(8807218L, 9760613L, 3175671L,
10878727L, 2025038L, 5345659L, 14474481L, 1650223L, 684883L,
9129975L), class = "data.frame", .Names = c("dropoff_longitude",
"dropoff_latitude"))

# make them all super spatial-like (must be in lon,lat format)
pts <- SpatialPoints(as.matrix(latlongdata[,1:2]))

# figure out where they are (this can take a bit)
dat <- pts %over% ny

# merge your data back in (there are many ways to do this)
dat$lon <- latlongdata$dropoff_longitude
dat$lat <- latlongdata$dropoff_latitude
rownames(dat) <- rownames(latlongdata)

# boom
dat
## ZCTA5CE10 AFFGEOID10 GEOID10 ALAND10 AWATER10 lon lat
## 8807218 10019 8600000US10019 10019 1780742 0 -73.98171 40.76056
## 9760613 10018 8600000US10018 10018 836253 0 -73.99355 40.75635
## 3175671 10022 8600000US10022 10022 1107169 0 -73.97330 40.76265
## 10878727 10069 8600000US10069 10069 249044 0 -73.98882 40.77750
## 2025038 11221 8600000US11221 11221 3582803 0 -73.93848 40.68469
## 5345659 10280 8600000US10280 10280 300652 38759 -74.01550 40.70988
## 14474481 10128 8600000US10128 10128 1206195 0 -73.95472 40.78337
## 1650223 10028 8600000US10028 10028 811363 0 -73.95710 40.77666
## 684883 10017 8600000US10017 10017 820953 0 -73.97130 40.75215
## 9129975 10013 8600000US10013 10013 1425085 0 -73.99794 40.72053


Related Topics



Leave a reply



Submit