Tidyr: Multiple Unnesting with Varying Na Counts

tidyr::unnest() with different column types

You can convert all relevant columns to character one step before unnesting.

tibble(
file = list(a, b, c)) %>%
mutate(file = map(file, ~ mutate(.x, char_vec = as.character(char_vec)))) %>%
unnest(cols = c(file))

If there are several columns that need treatment you can do:

 tibble(
file = list(a, b, c)) %>%
mutate(file = map(file, ~ mutate_at(.x, vars(starts_with("char")), ~as.character(.))))

Data for the latter example:

a <- tibble(
value = rnorm(3),
char_vec = c(NA, "A", NA),
char_vec2 = c(NA, NA, NA))

b <- tibble(
value = rnorm(2),
char_vec = c(NA, "B"),
char_vec2 = c("C", "A"))

c <- tibble(
value = rnorm(3),
char_vec = c(NA, NA, NA),
char_vec2 = c("B", NA, "A"))

How to unnest multiple list columns of a dataframe in one go with dplyr pipe

There's probably a cleaner way to do it, but if you want the cartesian product for the columns you can unnest them in sequence, if nothing else:

> df %>% 
unnest(a, .drop = FALSE) %>%
unnest(b, .drop = FALSE)

# # A tibble: 7 x 3
# c a b
# <dbl> <chr> <chr>
# 1 11 a 1
# 2 11 a 2
# 3 11 a 3
# 4 11 b 1
# 5 11 b 2
# 6 11 b 3
# 7 22 c 3

tidyr unnest, prefix column names with nested name during unnesting

The answer to this was somewhat obvious, use the names_sep option rather than the names_repair option. As quoted from the nest help menu under names_sep:

If a string, the inner and outer names will be used together. In
nest(), the names of the new outer columns will be formed by pasting
together the outer and the inner column names, separated by names_sep.
In unnest(), the new inner names will have the outer names (+
names_sep) automatically stripped. This makes names_sep roughly
symmetric between nesting and unnesting.


library(dplyr, warn.conflicts = FALSE)

msd_c <- function(x) c(mn = mean(x), sd = sd(x))
msd_df <- function(x) bind_rows(c(mn = mean(x), sd = sd(x)))

iris %>%
select(Petal.Length:Species) %>%
group_by(Species) %>%
tidyr::nest() %>%
mutate(
Petal.Length = purrr::map(data, ~ msd_df(.$Petal.Length)),
Petal.Width = purrr::map(data, ~ msd_df(.$Petal.Width)),
Correlation = purrr::map(data, ~ broom::tidy(cor.test(.$Petal.Length, .$Petal.Width))),
) %>%
select(-data) %>%
tidyr::unnest(c(Petal.Length, Petal.Width, Correlation), names_sep = ".")
#> # A tibble: 3 x 13
#> # Groups: Species [3]
#> Species Petal.Length.mn Petal.Length.sd Petal.Width.mn Petal.Width.sd
#> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 setosa 1.46 0.174 0.246 0.105
#> 2 versic~ 4.26 0.470 1.33 0.198
#> 3 virgin~ 5.55 0.552 2.03 0.275
#> # ... with 8 more variables: Correlation.estimate <dbl>,
#> # Correlation.statistic <dbl>, Correlation.p.value <dbl>,
#> # Correlation.parameter <int>, Correlation.conf.low <dbl>,
#> # Correlation.conf.high <dbl>, Correlation.method <chr>,
#> # Correlation.alternative <chr>

Created on 2020-06-10 by the reprex package (v0.3.0)

unnest_auto and unnest_longer to unnest multiple columns

Here, since you're unnesting a two dimensional structure (i.e. you want to change both the rows and columns), you can just use unnest:

library(tidyr)

df <- as_tibble(df)
df
#> # A tibble: 4 × 3
#> `_id` variable myDate
#> <chr> <list> <dttm>
#> 1 a <df [7 × 4]> 2018-03-19 18:48:31
#> 2 b <df [2 × 4]> 2018-03-19 21:46:51
#> 3 c <df [1 × 3]> 2019-03-04 05:39:44
#> 4 d <df [1 × 7]> 2019-03-26 15:38:13

df |>
unnest(variable)
#> # A tibble: 11 × 10
#> `_id` type m_ t_ a_ b_ i_ e_ l_ myDate
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <df[> <chr> <dttm>
#> 1 a u m1 2015-07-… <NA> <NA> <NA> <NA> 2018-03-19 18:48:31
#> 2 a a m2 2016-04-… "" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 3 a u m3 2017-10-… <NA> <NA> <NA> <NA> 2018-03-19 18:48:31
#> 4 a a m4 2018-12-… "" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 5 a u m5 2019-04-… <NA> <NA> <NA> <NA> 2018-03-19 18:48:31
#> 6 a a m6 2016-05-… "C" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 7 a a m7 2016-06-… "C" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 8 b u m1 2018-05-… <NA> <NA> <NA> <NA> 2018-03-19 21:46:51
#> 9 b a m2 2019-04-… "" <NA> <NA> <NA> 2018-03-19 21:46:51
#> 10 c u m1 2018-02-… <NA> <NA> <NA> <NA> 2019-03-04 05:39:44
#> 11 d u m1 2016-05-… <NA> b1 i1 l1 2019-03-26 15:38:13

If you did want to do it in two steps, you could take advantage of the fact that unnest_longer() now takes a tidyselect specification:

df |> 
unnest_wider(variable) |>
unnest_longer(type:a_)
#> # A tibble: 11 × 10
#> `_id` type m_ t_ a_ b_ i_ e_ l_ myDate
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <df[> <chr> <dttm>
#> 1 a u m1 2015-07-… <NA> <NA> <NA> <NA> 2018-03-19 18:48:31
#> 2 a a m2 2016-04-… "" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 3 a u m3 2017-10-… <NA> <NA> <NA> <NA> 2018-03-19 18:48:31
#> 4 a a m4 2018-12-… "" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 5 a u m5 2019-04-… <NA> <NA> <NA> <NA> 2018-03-19 18:48:31
#> 6 a a m6 2016-05-… "C" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 7 a a m7 2016-06-… "C" <NA> <NA> <NA> 2018-03-19 18:48:31
#> 8 b u m1 2018-05-… <NA> <NA> <NA> <NA> 2018-03-19 21:46:51
#> 9 b a m2 2019-04-… "" <NA> <NA> <NA> 2018-03-19 21:46:51
#> 10 c u m1 2018-02-… <NA> <NA> <NA> <NA> 2019-03-04 05:39:44
#> 11 d u m1 2016-05-… <NA> b1 i1 l1 2019-03-26 15:38:13

Unnest a data frame and fill new rows with NAs

Repeating rows, and binding with an unnest of the nested list column(s):

nr <- sapply(df$nestdf, nrow) - 1
cbind(
df[rep(rbind(seq_along(nr), NA), rbind(1, nr)), c("x","y")],
unnest(df["nestdf"], cols=everything())
)

# x y a b
#1 1 2 1 3
#2 NA NA 2 4
#3 3 4 3 5
#4 NA NA 4 6
#5 NA NA 5 7

Unnest a list-column of tibbles with different data type (cannot combine double and character)

Maybe the following?

library(tidyverse)

df <- tribble(
~x, ~y,
1, tibble(a=1, b=2),
2, tibble(a=4:5, b=c("thank","you"),c=1:2))

df %>%
mutate(y = map(y, ~ mutate(.x, b = as.character(b)))) %>%
unnest(cols = c(y))

#> # A tibble: 3 × 4
#> x a b c
#> <dbl> <dbl> <chr> <int>
#> 1 1 1 2 NA
#> 2 2 4 thank 1
#> 3 2 5 you 2

unnest list of lists of different lengths to dataframe

Would this work for you ?

library(jsonlite)
library(tidyverse)
data = fromJSON("http://search.worldbank.org/api/v2/wds?format=json&fl=abstracts,admreg,alt_title,authr,available_in,bdmdt,chronical_docm_id,closedt,colti,count,credit_no,disclosure_date,disclosure_type,disclosure_type_date,disclstat,display_title,docdt,docm_id,docna,docty,dois,entityid,envcat,geo_reg,geo_reg,geo_reg_and_mdk,guid,historic_topic,id,isbn,ispublicdocs,issn,keywd,lang,listing_relative_url,lndinstr,loan_no,majdocty,majtheme,ml_abstract,ml_display_title,new_url,owner,pdfurl,prdln,projectid,projn,publishtoextweb_dt,repnb,repnme,seccl,sectr,src_cit,subsc,subtopic,teratopic,theme,topic,topicv3,totvolnb,trustfund,txturl,unregnbr,url_friendly_title,versiontyp,versiontyp_key,virt_coll,vol_title,volnb&str_docdt=1986-01-01&end_docdt=2000-12-31&rows=500&os=1&srt=docdt&order=desc")

df <-
data$documents %>%
head(-1) %>% # remove facet element
transpose %>% # transpose so each subelement is now a main element
as_tibble %>% # convert to table
purrr::modify(~replace(.x,lengths(.x)==0,list(NA))) %>% # replace empty elements by list(NA) so they have length 1 too
modify_if(~all(lengths(.x)==1),unlist) # unlist lists that contain only items of length 1

Only one list column remains:

names(df)[map_chr(df,class) == "list"]
# [1] "keywd"

As it contains items of length 1 or 2:

table(lengths(df$keywd))
# 1 2
# 224 276

Here's what the output looks like:

glimpse(df)

# Observations: 500
# Variables: 38
# $ url <chr> "http://documents.worldbank.org/curated/en/903231468764970044/Attacking-rural-poverty-strategy-and-public-actions", "...
# $ available_in <chr> "English", "English", "English", "English", "English", "English,French,Spanish,Portuguese", "Portuguese,Chinese,Engli...
# $ url_friendly_title <chr> "http://documents.worldbank.org/curated/en/903231468764970044/Attacking-rural-poverty-strategy-and-public-actions", "...
# $ new_url <chr> "2000/12/1000476/Attacking-rural-poverty-strategy-and-public-actions", "2000/12/1000501/State-policies-and-womens-aut...
# $ guid <chr> "903231468764970044", "429001468753367328", "985531468746683502", "890081468757236671", "922151468776107524", "324581...
# $ disclosure_date <chr> "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z...
# $ disclosure_type <chr> "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA...
# $ disclosure_type_date <chr> "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z...
# $ publishtoextweb_dt <chr> "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z", "2010-07-01T00:00:00Z...
# $ docm_id <chr> "090224b0828c737a", "090224b0828ac316", "090224b0828bd3f7", "090224b0828ac343", "090224b0828cf43d", "090224b0828cf42b...
# $ chronical_docm_id <chr> "090224b0828c737a", "090224b0828ac316", "090224b0828bd3f7", "090224b0828ac343", "090224b0828cf43d", "090224b0828cf42b...
# $ txturl <chr> "http://documents.worldbank.org/curated/en/903231468764970044/text/multi-page.txt", "http://documents.worldbank.org/c...
# $ pdfurl <chr> "http://documents.worldbank.org/curated/en/903231468764970044/pdf/multi-page.pdf", "http://documents.worldbank.org/cu...
# $ docdt <chr> "2000-12-31T00:00:00Z", "2000-12-31T00:00:00Z", "2000-12-31T00:00:00Z", "2000-12-31T00:00:00Z", "2000-12-31T00:00:00Z...
# $ totvolnb <chr> "1", "1", "1", "1", "5", "1", "1", "14", "1", "1", "1", "1", "14", "14", "14", "14", "14", "14", "14", "14", "14", "1...
# $ versiontyp <chr> "Final", "Final", "Final", "Final", "Final", "Final", "Final", "Final", "Final", "Final", "Final", "Final", "Final", ...
# $ versiontyp_key <chr> "1309935", "1309935", "1309935", "1309935", "1309935", "1309935", "1309935", "1309935", "1309935", "1309935", "130993...
# $ volnb <chr> "1", "1", "1", "1", "4", "1", "1", "8", "1", "1", "1", "1", "13", "4", "9", "12", "3", "2", "7", "10", "1", "6", "11"...
# $ repnme <chr> "Attacking rural poverty : strategy and\n public actions", "State policies and women's autonomy in\n ...
# $ abstracts <chr> "Poverty remains pervasive, and its\n incidence and intensity are usually higher in rural than in\n ...
# $ display_title <chr> "Attacking rural poverty :\n strategy and public actions", "State policies and women's\n autono...
# $ listing_relative_url <chr> "/research/2000/12/1000476/attacking-rural-poverty-strategy-public-actions", "/research/2000/12/1000501/state-policie...
# $ docty <chr> "Newsletter", "Working Paper (Numbered Series)", "Publication", "Poverty Reduction Strategy Paper (PRSP)", "Environme...
# $ subtopic <chr> "Economic Theory & Research,Rural Settlements,Industrial Economics,Nutrition,Educational Sciences,Economic Growth,Agr...
# $ docna <chr> "Attacking rural poverty : strategy and\n public actions", "State policies and women's autonomy in\n ...
# $ teratopic <chr> "Poverty Reduction", "Education", "Energy", "Poverty Reduction", "Industry,Transport,Water Resources", NA, "Governanc...
# $ authors <chr> "Okidegbe, Nwanze", "Zhang, Xiaodan", "Bogach, V. Susan", NA, "Carl Brothers International Inc.", "World Bank", "Mann...
# $ entityids <chr> "000094946_01022305364180", "000094946_01022705322025", "000094946_01011005520622", "000094946_0102240538258", "00009...
# $ subsc <chr> "Macro/Non-Trade", "Human Development", "(Historic)Other power and energy conversion", "(Historic)Macro/non-trade", "...
# $ lang <chr> "English", "English", "English", "English", "English", "Portuguese", "English", "English", "Chinese", "English", "Eng...
# $ historic_topic <chr> "Poverty Reduction", "Education", "Energy", "Poverty Reduction", "Industry,Transport,Water Resources", NA, "Governanc...
# $ seccl <chr> "Public", "Public", "Public", "Public", "Public", "Public", "Public", "Public", "Public", "Public", "Public", "Public...
# $ sectr <chr> "(Historic)Economic Policy", "(Historic)Multisector", "(Historic)Electric Power & Other Energy", "(Historic)Economic ...
# $ majdocty <chr> "Publications & Research", "Publications & Research", "Publications,Publications & Research", "Country Focus", "Proje...
# $ src_cit <chr> "Rural development note. -- No. 6 (December 2000)", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
# $ keywd <list> [[["Rural Poor;medium term expenditure\n framework;rural poverty reduction strategy;rural\n ar...
# $ owner <chr> "Environ & Soc Sustainable Dev VP (ESD)", "Off of Sr VP Dev Econ/Chief Econ (DECVP)", "Energy & Mining Sector Unit (E...
# $ repnb <chr> "21649", "21743", "WTP492", "21834", "E287", "27779", "21604", "E425", "21604", "22194", "21837", "22903", "E425", "E...

using tidyr unnest with NULL values

We can use map_lgl from purrr here. If you don't care about those rows with a NULL, you could simply remove them with filter and unnest:

library(tidyverse)

df %>%
filter(!map_lgl(b, is.null)) %>%
unnest()
#> # A tibble: 3 x 3
#> a id value
#> <dbl> <fctr> <dbl>
#> 1 1 c 7
#> 2 1 d 8
#> 3 1 e 9

In case you want to keep those rows, you could bring them back with right_join after unnesting:

df %>% 
filter(!map_lgl(b, is.null)) %>%
unnest() %>%
right_join(select(df, a))
#> Joining, by = "a"
#> # A tibble: 4 x 3
#> a id value
#> <dbl> <fctr> <dbl>
#> 1 1 c 7
#> 2 1 d 8
#> 3 1 e 9
#> 4 2 <NA> NA

Data

input1 <- data.frame(id = c("c", "d", "e"), value = c(7, 8, 9))
input2 <- NULL
input3 <- data.frame(id = c(NA), value = c(NA))

df <- dplyr::tibble(
a = c(1, 2),
b = list(a = input1, c = input2)
)


Related Topics



Leave a reply



Submit