Loop with a Defined Ggplot Function Over Multiple Dataframes

Loop with a defined ggplot function over multiple dataframes

You can create multiple ggplots in a loop with predifined function myplot() as follows:

list <- c("df1","df2","df3") #just one character vector as the titles are the same as the names of the data frames

myplot <- function(data, title){
ggplot(data, aes(x = x, y = y)) +
geom_point(color="grey") +
labs(title = title)
}

for(i in list){
print(myplot(get(i), i))
}

If you wanna work with 2 vectors giving the names if the data frames and of the titles you can do the following:

list <- c("df1","df2","df3")
titles <- c("Title 1","Plot 2","gg3")

myplot <- function(data, title){
ggplot(data, aes(x = x, y = y)) +
geom_point(color="grey") +
labs(title = title)
}

for(i in seq_along(list)){ #here could also be seq_along(titles) as they a re of the same length
print(myplot(get(list[i]), titles[i]))
}

Ggplot over multiple dataframes with loops

Your problem wasn't in the iteration over the list of dataframes, it was in the use of b1 within the annotate(). Here, I've created a new dataframe within each loop, and called the column name specifically. There is probably a nicer way of doing this, though. Also, the ggsave() needed to call the names of the items in the list, specifically.

library(tidyverse)

#Create dataframes(In this example n = 3)
df_1 <- data.frame(a1 = 1:1000,
b1 = 1:1000)
df_2 <- data.frame(a1 = 1:1000,
b1 = 1:1000)
df_3 <- data.frame(a1 = 1:1000,
b1 = 1:1000)

##Store dataframes in list
example.list<-lapply(1:3, function(x) eval(parse(text=paste0("df_", x)))) #In order to store all datasets in one list using their name
names(example.list)<-lapply(1:3, function(x) paste0("df_", x))

#Graph and save for each dataframe

for (i in 1:length(example.list)){
df_i <- example.list[[i]]
benp <-
df_i %>%
ggplot(aes(x=b1)) +
geom_histogram(fill="steelblue", aes(y=..density.., alpha=..count..), bins=60) +
labs(title="Beneficios", subtitle="") + ylab("Densidad") +
xlab("Beneficios ($millones)") +
geom_vline(aes(xintercept=mean(b1)), color="red4",linetype="dashed") +
theme(legend.position = "none") +
annotate("text", x= mean(df_i$b1), y=0, label=round(mean(df_i$b1), digits = 2),
colour="red4", size=3.5, vjust=-1.5, hjust=-0.5)
ggsave(benp, file=paste0(names(example.list)[i],"_histogram.png"))
}

How do you use loop in R with ggpie function and save a filename after the dataframe?

This works for me

library(tibble)
library(dplyr)
library(ggpubr)

DZmix_SC1 <- tibble(
Sample_ID = rep('SC1_18', 3),
Potential_Sources = c('Uintas', 'Sierra Madre', 'CMB'),
Relative_Contribution = c(0,22,78),
Metric = rep('KV', 3)
)

DZmix_5_SC <- tibble(
Sample_ID = rep('5-SC_18', 3),
Potential_Sources = c('Uintas', 'Sierra Madre', 'CMB'),
Relative_Contribution = c(0,29,71),
Metric = rep('KV', 3)
)

DZmix_PL3 <- tibble(
Sample_ID = rep('PL3_18', 3),
Potential_Sources = c('Uintas', 'Sierra Madre', 'CMB'),
Relative_Contribution = c(69,0,31),
Metric = rep('KV', 3)
)

Sample_list <- c("DZmix_SC1", "DZmix_5_SC", "DZmix_PL3")

DZpie.fn <- function(df,title) {
df <- df %>%
mutate(Relative_Contribution = round(Relative_Contribution,1)) %>%
arrange(desc(Potential_Sources))
ggpie(df, "Relative_Contribution", label = "Relative_Contribution",
fill = "Potential_Sources", color = "white", size = 1.5,
palette = c("#636363", "#cccccc", "#969696"),
lab.pos = c("in"),
lab.font = c(0, "bold", "black")) +
theme(legend.position = "none",
panel.background = element_rect(fill = "transparent"),
plot.background = element_rect(fill = "transparent", color = NA))
}

for(i in Sample_list){
print(DZpie.fn(get(i), i))
}

Your method is actually correct. You just miss placed + before lab.pos = c("in").

Then you can save images using

for (i in Sample_list){
ggsave(DZpie.fn(get(i), i), filename=paste0("temp/",i,".png"))
}

Or equivalently but without for loop

purrr::walk(Sample_list, function(name) ggsave(DZpie.fn(get(name), name), 
filename=paste0("temp/",name,".png")))

loop over dataframes in ggplot2

The usual way to iterate over data.frames (which are just regularly organized lists) is with lapply:

 df1 <- data.frame(date = as.Date(10*365*rbeta(100, .5, .1)),group="a")
df2 <- data.frame(date = as.Date(10*365*rbeta(50, .1, .5)),group="b")
df3 <- data.frame(date = as.Date(10*365*rbeta(25, 3,3)),group="c")
dfrmL <- list(df1,df2,df3)

lapply(dfrmL, NROW)
[[1]]
[1] 100

[[2]]
[1] 50

[[3]]
[1] 25

In the case of producing a list of ggplot-objects I would imagine that the Hadley-method would instead be to use llply, but I'm not a skilled plyr-user, so let me suggest this totally untested code template:

plts <- lapply(dfrmL, function(df) qplot(qplot(data= df, 
V1, reorder(V2,V3), color = V3) +
coord_flip()
)
# you may need to explicitly print() or plot() the plots as stated in the R-FAQ.
lapply(plts, print)

Create ggplot multiple graphs dynamically with loop in R

You can create a function which includes the y variable you want to plot. Then it becomes relatively simple to create a for loop to cycle through multiple y variable options or you could use a function from purrr. Now included the loop, and I've had to go down the route of quoted variables names as @stefan explained.

library(ggplot2)
library(purrr)

gg_fun <- function(y_var){
Forecast_Monthly_Visits_FR_Graph <-
ggplot(data=Actual_Forecast_Monthly_France, aes(x=Date, y=!!sym(y_var), group=Type, linetype = factor(Type) , show.legend = FALSE)) +
geom_line(aes(color=Type)) +
geom_point(size = 0.5) +
geom_text(aes(label=round(Visits)), size = 3) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = y_var) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_manual(values= c("#03a623", "#030063", "#ffaad7", "#b6b6b6"))

return(Forecast_Monthly_Visits_FR_Graph)

}

gg <- map(c("Visits", "Sales"), gg_fun)

gg[[1]]

Sample Image

gg[[2]]

Sample Image

Created on 2022-09-13 with reprex v2.0.2

data

Date <- as.Date(c('2022-01-01','2022-01-02','2022-01-03','2022-01-01','2022-01-02','2022-01-03'))
Type <- c("Actual", "Actual", "Actual", "Forecast", "Forecast", "Forecast")
Visits <- c(67398,63398,61398,53422,72726,92822)
Sales <- c(17398,23398,41398,12422,33726,53822)
Actual_Forecast_Monthly_France <- data.frame(Date , Type , Visits, Sales)

Created on 2022-09-13 with reprex v2.0.2

How to index a dataframe for using ggplot in a loop

(Up front: the reason that your plots are all identical is due to ggplot's "lazy" evaluation of code. See my #2 below, where I identify that the data[,i] is evaluated when you try to plot the data, at which point i is 4, the last pass in the for loop.)

  1. It's generally preferred/recommended to use data.frames instead of matrices or vectors (as you're doing here). It gives a bit more power and control.

    data <- data.frame(a,b,c,d,time)
  2. Also, I tend to prefer lapply to for-loops and lists, for various (some subjective) reasons. Ultimately, the issue you're having is that ggplot2 is evaluating the data lazily, so plots is a list with four plots that make reference to i ... and that is realized when you try to plot them all, at which point i is 4 (from the last pass through the loop). One benefit of using lapply is that the i referenced is a local-only (inside of the anon-func) version of i that is preserved as you would expect.

    plots <- lapply(names(data)[1:4],
    function(nm) ggplot(data, aes(x = time, y = .data[[nm]])) + geom_line())
    gridExtra::grid.arrange(plots[[1]], plots[[2]])

    grid.arrange on two ggplots

  3. I also prefer patchwork to gridExtra, mostly because it makes more-customized layouts a bit more intuitive, plus adds functionality such as axis-alignment, shared legends, shared titles, etc. (None of those other features are demonstrated here.)

    library(patchwork)
    plots[[1]] / plots[[2]] # same plot
    plots[[1]] + plots[[2]] # side-by-side instead of top/bottom
    (plots[[1]] + plots[[2]]) / (plots[[3]] + plots[[4]]) # grid
  4. Ultimately, though, I suggest that facets can be useful and very powerful. For this, we need to melt/pivot the data into a "long format" so that the column names a-b are actually in one column.

    reshape2::melt(data, id.vars = "time") |>
    ggplot(aes(time, value)) +
    geom_line() +
    facet_grid(variable ~ ., scales = "free_y")

    ggplot2 with facets

    I assumed the preference for independent (free) y-scales, ergo the scales="free_y". Try it without if you want to see the options. (There are also scales="free_x" and scales="free" (both).)

    To see what I mean by "long" format:

    reshape2::melt(data, id.vars = "time")
    # time variable value
    # 1 1 a 1
    # 2 2 a 2
    # 3 3 a 3
    # 4 4 a 4
    # 5 1 b 5
    # 6 2 b 6
    # 7 3 b 7
    # 8 4 b 8
    # 9 1 c 9
    # 10 2 c 10
    # 11 3 c 11
    # 12 4 c 12
    # 13 1 d 13
    # 14 2 d 14
    # 15 3 d 15
    # 16 4 d 16

    This can also be done with tidyr::pivot_longer(data, -time), albeit the variable name is now name. For this use, there is no advantage to reshape2::melt or tidyr::pivot_longer; there are opportunities for significantly more complex pivoting in the latter, not relevant with this data.


Data

data <- structure(list(a = c(1, 2, 3, 4), b = c(5, 6, 7, 8), c = c(9, 10, 11, 12), d = c(13, 14, 15, 16), time = c(1, 2, 3, 4)), class = "data.frame", row.names = c(NA, -4L))

ggplot2: output multiple plots by looping

Following this answer:

library(ggplot2)
point1 <- c(1,2)
point2 <- c(2,2)

points <-as.data.frame(rbind(point1,point2))
point_plots_list <- vector('list', ncol(points))
point_plots_list <- list()
for (i in 1:nrow(points)) {
message(i)
point_plots_list[[i]] <- local({
i <- i
p <- ggplot(data = points) +
geom_point(aes(x = points[i, 1], y = points[i, 2])) +
xlim(-3, 3) +
ylim(-3, 3) +
theme_bw()
})
}
#> 1
#> 2
point_plots_list
#> [[1]]

Sample Image

#> 
#> [[2]]

Sample Image

Using lapply instead of a for loop and displaying plots with ggarrange or multiplot:

library(ggplot2)
library(ggpubr)
point1 <- c(1, 2)
point2 <- c(2, 2)

points <- as.data.frame(rbind(point1, point2))

plot_data <- function(data) {
ggplot(data) +
geom_point(aes(x = data[, 1], y = data[, 2])) +
xlim(-3, 3) +
ylim(-3, 3) +
theme_bw()
}
myplots <- list()
myplots <- lapply(1:nrow(points), function(i) plot_data(points[i, ]))

ggarrange(myplots[[1]], myplots[[2]],

labels = c("A", "B"),
ncol = 2, nrow = 1)

Sample Image

source("http://peterhaschke.com/Code/multiplot.R") #load multiplot function
multiplot(plotlist = myplots, cols = 2)
#> Loading required package: grid

Sample Image

Created on 2021-04-09 by the reprex package (v0.3.0)

Looping through a list of dataframes and saving individual graphs for each

What if you facet_wrap?

Creating df from your data...

ID <- c(1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 
1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027,
1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027,
1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027,
1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027)
Week = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64)
MDD_PSR = c(NA, NA, NA, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
ADHD_PSR = c(NA, NA, NA, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)
GAD_PSR = c(NA, NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)

b <- data.frame(ID,Week, MDD_PSR,ADHD_PSR,GAD_PSR)

ID <- c(1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038,
1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038,
1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038,
1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038,
1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038,
1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038, 1038,
1038, 1038, 1038, 1038, 1038)
Week <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)
MDD_PSR <- c(NA,2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 5, 5, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA)
ADHD_PSR <- c(NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)
GAD_PSR <- c(NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
c <- data.frame(ID,Week, MDD_PSR,ADHD_PSR,GAD_PSR)

f <- rbind.data.frame(b,c)

Getting the graphs to show up next to each other

c_plot <- f %>%
group_by(ID) %>%
gather(key = "Module", value = "PSR", -Week, -ID)

ggplot(data = c_plot , aes(x = Week, y = PSR)) +
geom_line(aes(color = Module, linetype = Module)) +
scale_color_manual(values = c("yellow4", "blue", "darkred")) +
scale_linetype_manual(values = c("solid","twodash", "solid")) +
ylim(1, 6)+
facet_wrap(.~ID)

Alternatively...This gets the error out of your code

c_plot <- f %>%
group_by(ID) %>%
gather(key = "Module", value = "PSR", -Week, -ID)


ALIFE_transpose_split = split(c_plot, c_plot$ID)

plotdata <- function(x) {
ggplot(data = x, aes(x = Week, y = PSR)) +
geom_line(aes(color = Module, linetype = Module)) +
scale_color_manual(values = c("yellow4", "blue", "darkred")) +
scale_linetype_manual(values = c("solid","twodash", "solid")) +
ylim(1, 6)

ggsave(
filename="figure%03d.pdf",
plot = last_plot(),
device = "pdf",
path = "./",
scale = 1,
width = 13,
height = 8,
dpi = 300,
units = c("in"),
limitsize = TRUE)
}


Related Topics



Leave a reply



Submit