Simplified Dput() in R

Simplified dput() in R

Edit: leaving the older solution at the bottom because it got a bounty and many votes but proposing an improved answer

You can use the {constructive} package, now only on GitHub but might be on CRAN by the time you read this :

# remotes::install_github("cynkra/constructive")
Df <- data.frame(A = c(2, 2, 2, 6, 7, 8),
B = c("A", "G", "N", NA, "L", "L"),
C = c(1L, 3L, 5L, NA, NA, NA))

constructive::construct(Df)
#> data.frame(
#> A = c(2, 2, 2, 6, 7, 8),
#> B = c("A", "G", "N", NA, "L", "L"),
#> C = c(1L, 3L, 5L, NA, NA, NA)
#> )

It has custom constructors to many common classes so it should be able to reproduce most objects faithfully in a human readable way.


Old solution:

3 solutions :

  • a wrapper around dput (handles standard data.frames, tibbles and lists)

  • a read.table solution (for data.frames)

  • a tibble::tribble solution (for data.frames, returning a tibble)

All include n and random parameter which allow one to dput only the head of the data or sample it on the fly.

dput_small1(Df)
# Df <- data.frame(
# A = c(2, 2, 2, 6, 7, 8),
# B = structure(c(1L, 2L, 4L, NA, 3L, 3L), .Label = c("A", "G", "L",
# "N"), class = "factor"),
# C = c(1L, 3L, 5L, NA, NA, NA) ,
# stringsAsFactors=FALSE)

dput_small2(Df,stringsAsFactors=TRUE)
# Df <- read.table(sep="\t", text="
# A B C
# 2 A 1
# 2 G 3
# 2 N 5
# 6 NA NA
# 7 L NA
# 8 L NA", header=TRUE, stringsAsFactors=TRUE)

dput_small3(Df)
# Df <- tibble::tribble(
# ~A, ~B, ~C,
# 2, "A", 1L,
# 2, "G", 3L,
# 2, "N", 5L,
# 6, NA_character_, NA_integer_,
# 7, "L", NA_integer_,
# 8, "L", NA_integer_
# )
# Df$B <- factor(Df$B)

Wrapper around dput

This option that gives an output very close to the one proposed in the question. It's quite general because it's actually wrapped around dput, but applied separately on columns.

multiline means 'keep dput's default output laid out into multiple lines'.

dput_small1<- function(x,
name=as.character(substitute(x)),
multiline = TRUE,
n=if ('list' %in% class(x)) length(x) else nrow(x),
random=FALSE,
seed = 1){
name
if('tbl_df' %in% class(x)) create_fun <- "tibble::tibble" else
if('list' %in% class(x)) create_fun <- "list" else
if('data.table' %in% class(x)) create_fun <- "data.table::data.table" else
create_fun <- "data.frame"

if(random) {
set.seed(seed)
if(create_fun == "list") x <- x[sample(1:length(x),n)] else
x <- x[sample(1:nrow(x),n),]
} else {
x <- head(x,n)
}

line_sep <- if (multiline) "\n " else ""
cat(sep='',name," <- ",create_fun,"(\n ",
paste0(unlist(
Map(function(item,nm) paste0(nm,if(nm=="") "" else " = ",paste(capture.output(dput(item)),collapse=line_sep)),
x,if(is.null(names(x))) rep("",length(x)) else names(x))),
collapse=",\n "),
if(create_fun == "data.frame") ",\n stringsAsFactors = FALSE)" else "\n)")
}

dput_small1(list(1,2,c=3,d=4),"my_list",random=TRUE,n=3)
# my_list <- list(
# 2,
# d = 4,
# c = 3
# )

read.table solution

For data.frames I find it comfortable however to have the input in a more explicit/tabular format.

This can be reached using read.table, then reformatting automatically the type of columns that read.table wouldn't get right. Not as general as first solution but will work smoothly for 95% of the cases found on SO.

dput_small2 <- function(df,
name=as.character(substitute(df)),
sep='\t',
header=TRUE,
stringsAsFactors = FALSE,
n= nrow(df),
random=FALSE,
seed = 1){
name
if(random) {
set.seed(seed)
df <- df[sample(1:nrow(df),n),]
} else {
df <- head(df,n)
}
cat(sep='',name,' <- read.table(sep="',sub('\t','\\\\t',sep),'", text="\n ',
paste(colnames(df),collapse=sep))
df <- head(df,n)
apply(df,1,function(x) cat(sep='','\n ',paste(x,collapse=sep)))
cat(sep='','", header=',header,', stringsAsFactors=',stringsAsFactors,')')

sapply(names(df), function(x){
if(is.character(df[[x]]) & suppressWarnings(identical(as.character(as.numeric(df[[x]])),df[[x]]))){ # if it's a character column containing numbers
cat(sep='','\n',name,'$',x,' <- as.character(', name,'$',x,')')
} else if(is.factor(df[[x]]) & !stringsAsFactors) { # if it's a factor and conversion is not automated
cat(sep='','\n',name,'$',x,' <- factor(', name,'$',x,')')
} else if(inherits(df[[x]], "POSIXct")){
cat(sep='','\n',name,'$',x,' <- as.POSIXct(', name,'$',x,')')
} else if(inherits(df[[x]], "Date")){
cat(sep='','\n',name,'$',x,' <- as.Date(', name,'$',x,')')
}})
invisible(NULL)
}

Simplest case

dput_small2(iris,n=6)

will print:

iris <- read.table(sep="\t", text="
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.1 3.5 1.4 0.2 setosa
4.9 3.0 1.4 0.2 setosa
4.7 3.2 1.3 0.2 setosa
4.6 3.1 1.5 0.2 setosa
5.0 3.6 1.4 0.2 setosa
5.4 3.9 1.7 0.4 setosa", header=TRUE, stringsAsFactors=FALSE)

which in turn when executed will return :

#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1 5.1 3.5 1.4 0.2 setosa
# 2 4.9 3.0 1.4 0.2 setosa
# 3 4.7 3.2 1.3 0.2 setosa
# 4 4.6 3.1 1.5 0.2 setosa
# 5 5.0 3.6 1.4 0.2 setosa
# 6 5.4 3.9 1.7 0.4 setosa

str(iris)
# 'data.frame': 6 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4
# $ Species : chr " setosa" " setosa" " setosa" " setosa" ...

more complex

dummy data:

test <- data.frame(a=1:5,
b=as.character(6:10),
c=letters[1:5],
d=factor(letters[6:10]),
e=Sys.time()+(1:5),
stringsAsFactors = FALSE)

This:

dput_small2(test,'df2')

will print:

df2 <- read.table(sep="\t", text="
a b c d e
1 6 a f 2018-02-15 11:53:17
2 7 b g 2018-02-15 11:53:18
3 8 c h 2018-02-15 11:53:19
4 9 d i 2018-02-15 11:53:20
5 10 e j 2018-02-15 11:53:21", header=TRUE, stringsAsFactors=FALSE)
df2$b <- as.character(df2$b)
df2$d <- factor(df2$d)
df2$e <- as.POSIXct(df2$e)

which in turn when executed will return :

#   a  b c d                   e
# 1 1 6 a f 2018-02-15 11:53:17
# 2 2 7 b g 2018-02-15 11:53:18
# 3 3 8 c h 2018-02-15 11:53:19
# 4 4 9 d i 2018-02-15 11:53:20
# 5 5 10 e j 2018-02-15 11:53:21

str(df2)
# 'data.frame': 5 obs. of 5 variables:
# $ a: int 1 2 3 4 5
# $ b: chr "6" "7" "8" "9" ...
# $ c: chr "a" "b" "c" "d" ...
# $ d: Factor w/ 5 levels "f","g","h","i",..: 1 2 3 4 5
# $ e: POSIXct, format: "2018-02-15 11:53:17" "2018-02-15 11:53:18" "2018-02-15 11:53:19" "2018-02-15 11:53:20" ...

all.equal(df2,test)
# [1] "Component “e”: Mean absolute difference: 0.4574251" # only some rounding error

tribble solution

The read.table option is very readable but not very general. with tribble pretty much any data type can be handled (though factors need adhoc fixing).

This solution isn't so useful for OP's example but is great for list columns (see example below). To make use of the output, library tibble is required.

Just as my first solution, it's a wrapper around dput, but instead of 'dputting' columns, i'm 'dputting' elements.

dput_small3 <- function(df,
name=as.character(substitute(df)),
n= nrow(df),
random=FALSE,
seed = 1){
name
if(random) {
set.seed(seed)
df <- df[sample(1:nrow(df),n),]
} else {
df <- head(df,n)
}
df1 <- lapply(df,function(col) if(is.factor(col)) as.character(col) else col)
dputs <- sapply(df1,function(col){
col_dputs <- sapply(col,function(elt) paste(capture.output(dput(elt)),collapse=""))
max_char <- max(nchar(unlist(col_dputs)))
sapply(col_dputs,function(elt) paste(c(rep(" ",max_char-nchar(elt)),elt),collapse=""))
})
lines <- paste(apply(dputs,1,paste,collapse=", "),collapse=",\n ")
output <- paste0(name," <- tibble::tribble(\n ",
paste0("~",names(df),collapse=", "),
",\n ",lines,"\n)")
cat(output)
sapply(names(df), function(x) if(is.factor(df[[x]])) cat(sep='','\n',name,'$',x,' <- factor(', name,'$',x,')'))
invisible(NULL)
}

dput_small3(dplyr::starwars[c(1:3,11)],"sw",n=6,random=TRUE)
# sw <- tibble::tribble(
# ~name, ~height, ~mass, ~films,
# "Lando Calrissian", 177L, 79, c("Return of the Jedi", "The Empire Strikes Back"),
# "Finis Valorum", 170L, NA_real_, "The Phantom Menace",
# "Ki-Adi-Mundi", 198L, 82, c("Attack of the Clones", "The Phantom Menace", "Revenge of the Sith"),
# "Grievous", 216L, 159, "Revenge of the Sith",
# "Wedge Antilles", 170L, 77, c("Return of the Jedi", "The Empire Strikes Back", "A New Hope"),
# "Wat Tambor", 193L, 48, "Attack of the Clones"
# )

Example of using dput()

Using the iris dataset, which is handily included into R, we can see how dput() works:

data(iris)
head(iris)

Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa

Now we can get the whole dataset using dput(iris). In most situations, a whole dataset is unnecessary to provide for a Stackoverflow question, as a few lines of the relevant variables suffice as a working data example.

Two things come in handy: The head() function outputs only the first six rows of a dataframe/matrix. Also, the indexing in R (via brackets) allows you to select only specific columns.

Therefore, we can restrict the output of dput() using a combination of these two:

dput(head(iris[, c(1, 3)]))

structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4),
Petal.Length = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7)), .Names = c("Sepal.Length",
"Petal.Length"), row.names = c(NA, 6L), class = "data.frame")

will give us the code to reproduce the first (up to) six rows of column 1 and 3 of the iris dataset.

df <- structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4), 
Petal.Length = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7)), .Names = c("Sepal.Length",
"Petal.Length"), row.names = c(NA, 6L), class = "data.frame")

> df
Sepal.Length Petal.Length
1 5.1 1.4
2 4.9 1.4
3 4.7 1.3
4 4.6 1.5
5 5.0 1.4
6 5.4 1.7

If the first rows do not suffice, we can skip using head() and rely on indexing only:

dput(iris[1:20, c(1, 3)])

structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6,
5, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1
), Petal.Length = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4,
1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5)), .Names = c("Sepal.Length",
"Petal.Length"), row.names = c(NA, 20L), class = "data.frame")

will give us the the first twenty rows:

df <- structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 
5, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1
), Petal.Length = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4,
1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5)), .Names = c("Sepal.Length",
"Petal.Length"), row.names = c(NA, 20L), class = "data.frame")

> df
Sepal.Length Petal.Length
1 5.1 1.4
2 4.9 1.4
3 4.7 1.3
4 4.6 1.5
5 5.0 1.4
6 5.4 1.7
7 4.6 1.4
8 5.0 1.5
9 4.4 1.4
10 4.9 1.5
11 5.4 1.5
12 4.8 1.6
13 4.8 1.4
14 4.3 1.1
15 5.8 1.2
16 5.7 1.5
17 5.4 1.3
18 5.1 1.4
19 5.7 1.7
20 5.1 1.5

Using dput() with large sample size in R

You can always write the result of dput to a file:

dput(rnorm(20), 'test.txt')

See ?dput for details.

Getting a command to re-create an object that is simpler than with dput()

Here is a partial implementation for data.frames (no row.names or other attributes):

dput2 <- function(x, ...) UseMethod("dput2")
dput2.data.frame <- function(x, ...) {
fun <- function(nm) paste(nm, "=",
paste(capture.output(dput(x[[nm]], file = stdout())), collapse = ""))
L <- lapply(names(x), fun)
cat(paste("data.frame(", paste(unlist(L), collapse = ",\n"), ")"), "\n")
}

For example,

> dput2(BOD)
data.frame( Time = c(1, 2, 3, 4, 5, 7),
demand = c(8.3, 10.3, 19, 16, 15.6, 19.8) )

> dput2(anscombe)
data.frame( x1 = c(10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5),
x2 = c(10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5),
x3 = c(10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5),
x4 = c(8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8),
y1 = c(8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68),
y2 = c(9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74),
y3 = c(7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73),
y4 = c(6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89) )

R: Understanding dput() output

There are two main types "numeric" atomic vectors to think of in R. The first is "integers", or counting numbers, think of these as numbers like you would use for counting etc. -2, -1, 0, 1, 2, 3... The other is the "double" type or real numbers which is numbers that can be anywhere along an infinitely long number line, e.g. -8.43, -2.10, 0.001, 18.2797615.

Following the number with an L simply tells R they are integers not real numbers. In your dput it is simply that the column is full of 1s and 0s and that these are integers.

How to put the output of `dput` in one line in R?

Take dat <- head(iris) for example:

  1. Make one-line output for dput() displayed in the console:
cat(capture.output(dput(dat)), "\n", sep = "")

Output:

structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4),     Sepal.Width = c(3.5, 3, 3.2, 3.1, 3.6, 3.9), Petal.Length = c(1.4,     1.4, 1.3, 1.5, 1.4, 1.7), Petal.Width = c(0.2, 0.2, 0.2,     0.2, 0.2, 0.4), Species = structure(c(1L, 1L, 1L, 1L, 1L,     1L), levels = c("setosa", "versicolor", "virginica"), class = "factor")), row.names = c(NA, 6L), class = "data.frame")

  1. Copy to the clipboard (Windows only):
writeClipboard(paste(capture.output(dput(dat)), collapse = ""))

Where does dput write when diverting output using sink?

dput is actually writing the output where expected, but it's not writing it when expected.
Running the following code shows that dput output remains pending until the next normal output:

sink(file = 'test.txt', split = TRUE)
x <- 2^(1:4)
x
dput(2*x,file="")
3*x

...gives a test.txt with:

[1]  2  4  8 16
c(4, 8, 16, 32)
[1] 6 12 24 48

Alternatively, running the sink() function to close the file will also force the pending output (but will close the connection).

sink(file = 'test.txt', split = TRUE)
x <- 2^(1:4)
x
dput(2*x,file="")
sink()

Loading a dput into R using a script

You are over-complicating things -- see the help and example for dput().

Its output is meant to be copied over to a new assignment. This can be this simple:

R> ## let us create an object x
R> x <- c(a=1.23, b=42.3)
R> x
a b
1.23 42.30
R> dput(x)
structure(c(1.23, 42.3), .Names = c("a", "b"))
R>
R> ## now use the dput() output in an assignment
R> y <- structure(c(1.23, 42.3), .Names = c("a", "b"))
R>
R> ## and lo and behold:
R> identical(x, y)
[1] TRUE
R>

How to load the output of dput() into an object?

Use the assignment operator <-:

    my.object.df <- structure(list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 
24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4,
30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8,
19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8,
8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4),
disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8,
167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7,
71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145,
301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95,
123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150,
150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9,
3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,
3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76,
3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19,
3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2,
1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14,
1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61,
19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6,
18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87,
17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3,
3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3,
3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4,
2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1,
2, 2, 4, 6, 8, 2)), .Names = c("mpg", "cyl", "disp", "hp",
"drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("Mazda RX4",
"Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
"Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
"Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
"Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
"Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
"Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
"Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
"Volvo 142E"), class = "data.frame")

You could also make a text file and then use source to read and execute it. (It would, of course, be faster to load it from an '.Rdata' file created with save.)

This is a small example.

> test <- c(1.1, 2,3, 4)
> dput(test)
c(1.1, 2, 3, 4)
> dput( test, "mytest.txt")
> dget("mytest.txt")
[1] 1.1 2.0 3.0 4.0
> newtest <- dget("mytest.txt")
> newtest
[1] 1.1 2.0 3.0 4.0

Notice that the object needs to be assigned at the time of input with dget. If on the other hand you save the first line of the example (with an assignment to a name in the text) above as a text file, you can bring if back in with eval(parse(...))

> dget("secondtest.txt")
> test
Error: object 'test' not found
> ?load # Nope, that only works for binary files
> eval( parse(file= "secondtest.txt"))
> test
[1] 1.1 2.0 3.0 4.0


Related Topics



Leave a reply



Submit