Get Continent Name from Country Name in R

Get continent name from country name in R

You can use the countrycode package for this task.

library(countrycode)
df <- data.frame(country = c("Afghanistan",
"Algeria",
"USA",
"France",
"New Zealand",
"Fantasyland"))

df$continent <- countrycode(sourcevar = df[, "country"],
origin = "country.name",
destination = "continent")
#warning
#In countrycode(sourcevar = df[, "country"], origin = "country.name", :
# Some values were not matched unambiguously: Fantasyland

Result

df
# country continent
#1 Afghanistan Asia
#2 Algeria Africa
#3 USA Americas
#4 France Europe
#5 New Zealand Oceania
#6 Fantasyland <NA>

Is there a package/function in R that will identify country/continent?

Ok, both are possible:

library(dplyr)
library(countrycode)

df<- c("random,thing,thing, United States", "site, level, state, information, Sweden")
df2 <- data_frame(Country = sub('.*\\, ', '', df))
df2$Continent <- countrycode(sourcevar = df2$Country,
origin = "country.name",
destination = "continent")
df2

countrycode in R - NA's returned when detecting country name in df

Kindly let me know if this is what you were anticipating.

country %>%
mutate(continent = countrycode(sourcevar = country, origin = 'country.name', destination = 'region'))

Fast way to parse vector of continent / country / city in R

Consider using read.table from base R

read.table(text = x, sep = "/", header = FALSE,
fill = TRUE, strip.white = TRUE, na.strings = "")
V1 V2 V3
1 Africa Kenya Nairobi
2 Africa Kenya Nairobi
3 Africa Kenya <NA>

Or using fread from data.table

library(data.table)
fread(text = paste(x, collapse="\n"), sep="/", fill = TRUE, na.strings = "")
Africa Kenya Nairobi
1: Africa Kenya Nairobi
2: Africa Kenya <NA>

Benchmarks

x <- rep("Africa / Kenya / Nairobi", 1000000)
>
> system.time(fread(text = paste(x, collapse="\n"), sep="/", fill = TRUE, na.strings = ""))
user system elapsed
0.473 0.024 0.496

> system.time(read.table(text = x, sep = "/", header = FALSE,
+ fill = TRUE, strip.white = TRUE, na.strings = ""))
user system elapsed
0.519 0.026 0.543

> system.time({ #Using data.table
+ y <- do.call(cbind, data.table::tstrsplit(x, "/", TRUE))
+ y <- trimws(y, whitespace = " ")
+ })
user system elapsed
2.035 0.051 2.067

data

x <- c("Africa / Kenya / Nairobi", "Africa/Kenya/Nairobi", "Africa / Kenya")

Is there a function in R that lets me create a new column with (1) the full country names and (2) the respective continent?

Are you looking for

library(countrycode)

df <- data.frame(country_code = mycodes,
country_name = countrycode(sourcevar = mycodes, origin = "iso3c",
destination = "country.name"),
continent = countrycode(sourcevar = mycodes, origin = "iso3c",
destination = "continent"))

head(df)

?

Note, countrycode() does not manage to identify all strings "iso3c" (country codes) unambiguously. See the warnings to identify which

#> Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: TMP
#> Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: ATA, ATF, TMP, UMI

Btw, I see no need to use {dplyr} here.

Data



mycodes <- c("PRT", "FRA", "JPN", "IRL", "ESP", "BEL", "AUT", "DEU", "ITA", "CHN", "RUS", "POL", "USA", "CRI", "CHE", "ROU", "GBR",
"BRA", "FIN", "NLD", "CAN", "ZAF", "AUS", "AGO", "BGR", "SWE", "CYP", "ARG", "ARM", "CHL", "MOZ", "KOR", "TUN", "DNK", "GRC",
"NOR", "ISR", "MYS", "EGy", "JOR", "LUX", "TUR", "IRN", "LBY", "PAN", "COL", "VEN", "DZA", "GNB", "MAR", "CZE", "SVN", "IND",
"HUN", "NZL", "PER", "LTU", "TWN", "SRB", "EST", "KAZ", "KWT", "IDN", "UKR", "MEX", "SVK", "SAU", "ARE", "BGD", "THA", "TZA",
"LVA", "PHL", "BIH", "BHR", "NAM", "BOL", "HRV", "SGP", "CMR", "MLT", "URY", "PAK", "JAM", "ECU", "SYC", "QAT", "PRY", "BRB",
"OMN", "TMP", "ABW", "LBN", "SLV", "DMA", "CUB", "VNM", "GEO", "IRQ", "PYF", "UGA", "LIE", "SEN", "BLR", "ISL", "DOM",
"GUY", "LCA", "CPV", "ATA", "GAB", "NGA", "RWA", "CIV", "ALB", "MKD", "MNE", "GTM", "GHA", "MDV", "MCO", "MUS", "TGO", "LKA",
"AZE", "SUR", "KEN", "MRT", "HKG", "SYR", "CAF", "NCL", "UZB", "KIR", "SDN", "PRI", "ATF", "KNA", "TJK", "SLE", "LAO", "COM",
"ETH", "FRO", "AND", "BEN", "ZWE", "ASM", "MLI", "BWA", "AIA", "COD", "SPM", "JEY", "MDG", "NIC", "SWZ", "CYM", "SOM", "ATG",
"KGZ", "FLK", "GIB", "SMR", "TKM", "HTI", "UMI", "MMR", "WSM", "VIR", "ERI", "WLF", "GUF", "MWI", "PCN", "TCD")

How can I change some of the names of variables within columns?

With case_when you could extend: (more conditions):

library(dplyr)
df %>%
mutate(Continent = case_when(Country == "Afghanistan" ~ "Asia",
Country == "Australia" ~ "Oceana",
TRUE ~ Continent))

Country Year Continent Life_Expectancy
1 Afghanistan 2010 Asia 61.17996
2 Afghanistan 2011 Asia 61.72234
3 Afghanistan 2012 Asia 62.20652
4 Australia 2012 Oceana 43.22200

data:

df <- structure(list(Country = c("Afghanistan", "Afghanistan", "Afghanistan", 
"Australia"), Year = c(2010L, 2011L, 2012L, 2012L), Continent = c("Eastern Mediterranean",
"Eastern Mediterranean", "Eastern Mediterranean", "Western pacific"
), Life_Expectancy = c(61.17996, 61.72234, 62.20652, 43.222)), class = "data.frame", row.names = c("1",
"2", "3", "4"))


Related Topics



Leave a reply



Submit