Simpler Population Pyramid in Ggplot2

Simpler population pyramid in ggplot2

Here is a solution without the faceting. First, create data frame. I used values from 1 to 20 to ensure that none of values is negative (with population pyramids you don't get negative counts/ages).

test <- data.frame(v=sample(1:20,1000,replace=T), g=c('M','F'))

Then combined two geom_bar() calls separately for each of g values. For F counts are calculated as they are but for M counts are multiplied by -1 to get bar in opposite direction. Then scale_y_continuous() is used to get pretty values for axis.

require(ggplot2)
require(plyr)
ggplot(data=test,aes(x=as.factor(v),fill=g)) +
geom_bar(subset=.(g=="F")) +
geom_bar(subset=.(g=="M"),aes(y=..count..*(-1))) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
coord_flip()

UPDATE

As argument subset=. is deprecated in the latest ggplot2 versions the same result can be atchieved with function subset().

ggplot(data=test,aes(x=as.factor(v),fill=g)) + 
geom_bar(data=subset(test,g=="F")) +
geom_bar(data=subset(test,g=="M"),aes(y=..count..*(-1))) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
coord_flip()

Sample Image

Population pyramid plot with ggplot2 and dplyr (instead of plyr)

You avoid the error by specifying the argument data in geom_bar:

ggplot(data = test, aes(x = as.factor(v), fill = g)) + 
geom_bar(data = dplyr::filter(test, g == "F")) +
geom_bar(data = dplyr::filter(test, g == "M"), aes(y = ..count.. * (-1))) +
scale_y_continuous(breaks = seq(-40, 40, 10), labels = abs(seq(-40, 40, 10))) +
coord_flip()

how to make population pyramid with individual data that shows all age levels even if there is no data for that level in R?

To get all ages in the plot, (1) add all of the levels to the Age factor that you want included in the plot, and (2) add drop=FALSE to scale_x_discrete. To get a symmetric y axis, add the y-range you desire to coord_flip().

The example below has ages in 10-year groupings (except for age less than 1), created using the cut function. The labels in scale_x_discrete are set to correspond to the groupings in cut.

ggplot(data=test,aes(x=cut(Age, breaks=c(-1,seq(0,100,10))), fill=Sex)) + 
geom_bar(data=subset(test, Sex=="Female")) +
geom_bar(data=subset(test, Sex=="Male"), aes(y=..count..*(-1)),
position="identity") +
scale_y_continuous(breaks=seq(-50,50,10),labels=abs(seq(-50,50,10))) +
scale_x_discrete(labels=c("< 1",paste0(seq(1,91,10),"-",seq(10,100,10))), drop=FALSE) +
xlab("Age (years)") + ylab("Count") +
coord_flip(ylim=c(-20,20))

Sample Image

If you want to show every single age value as a separate bar, rather than group them in multi-year increments, you can do the following:

ggplot(data=test,aes(x=factor(round(Age), levels=seq(0,100,1)), fill=Sex)) + 
geom_bar(data=subset(test, Sex=="Female")) +
geom_bar(data=subset(test, Sex=="Male"), aes(y=..count..*(-1)),
position="identity") +
scale_y_continuous(breaks=seq(-50,50,10),labels=abs(seq(-50,50,10))) +
scale_x_discrete(breaks = seq(0,90,10), drop=FALSE) +
xlab("Age (years)") + ylab("Count") +
coord_flip(ylim=c(-20,20))

R ggplot2 create a pyramid plot

You could make one gender negative to create a pyramid plot and use two geom_bar, one per gender like this:

library(tidyverse)
library(janitor)
library(lemon)
pop = structure(list(age_group = c("< 5 years", "5 - 9", "10 - 14",
"15 - 19", "20 - 24", "25 - 29", "30 - 34", "35 - 44",
"45 - 54", "55 - 64", "65 - 74", "75 - 84", "85 +"),
males = c(6, 6, 7, 6, 7, 7, 8, 17, 15, 11, 6, 3, 1), females = c(6,
5, 6, 6, 6, 7, 7, 16, 15, 12, 7, 4, 2)), row.names = c(NA,
-13L), spec = structure(list(cols = list(`AGE GROUP` = structure(list(), class = c("collector_character",
"collector")), MALES = structure(list(), class = c("collector_double",
"collector")), FEMALES = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))

# Draw a pyramid plot

pop_df = pop %>%
dplyr::select(age_group,males,females) %>%
gather(key = Type, value = Value, -c(age_group))

# Make male values negative
pop_df$Value <- ifelse(pop_df$Type == "males", -1*pop_df$Value, pop_df$Value)

ggplot(pop_df, aes(x = age_group, y = Value, fill = Type)) +
geom_bar(data = subset(pop_df, Type == "females"), stat = "identity") +
geom_bar(data = subset(pop_df, Type == "males"), stat = "identity") +
scale_y_continuous(labels = abs) +
labs(x = "Age group", y = "Value", fill = "Gender") +
coord_flip()

Sample Image

Created on 2022-07-27 by the reprex package (v2.0.1)

Bad result when trying to create a population pyramid in R

I've tried to replicate your data and make a pyramid plot that might be of use to you.

First, some pretend data that I think is similar to yours:

set.seed(1234)
alter <- rep(1:75, each=2)
Geschlecht <- rep(strrep(c("männlich", "weiblich"), 1), 75)
v <- sample(1:20, 150, replace=T) # these are the values to make the pyramid
df <- data.frame(alter = alter, Geschlecht = Geschlecht, v=v)
rm(alter, Geschlecht, v) # remove the vectors to stop ggplot getting confused

UPDATE: Plot code below changed to provide counts in order of age:

Then a pyramid plot, using the method in the question you linked to:

library(ggplot2)
ggplot(data=df, aes(x=alter, fill=Geschlecht)) +
geom_bar(stat="identity", data=subset(df,Geschlecht=="weiblich"), aes(y=v)) +
geom_bar(stat="identity", data=subset(df,Geschlecht=="männlich"),aes(y=v*-1)) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
labs(y = "Anzahl", x = "Alter") +
coord_flip()

pyramid_plot_v3

You can also do it using your original style of code (produces same plot as above but in fewer lines):

ggplot(data = df, 
mapping = aes(x = alter, fill = Geschlecht,
y = ifelse(test = Geschlecht == "männlich",
yes = -v, no = v))) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = abs, limits = max(df$v) * c(-1,1)) +
labs(y = "Anzahl", x = "Alter") +
coord_flip()

drawing pyramid plot using R and ggplot2

This is essentially a back-to-back barplot, something like the ones generated using ggplot2 in the excellent learnr blog: http://learnr.wordpress.com/2009/09/24/ggplot2-back-to-back-bar-charts/

You can use coord_flip with one of those plots, but I'm not sure how you get it to share the y-axis labels between the two plots like what you have above. The code below should get you close enough to the original:

First create a sample data frame of data, convert the Age column to a factor with the required break-points:

require(ggplot2)
df <- data.frame(Type = sample(c('Male', 'Female', 'Female'), 1000, replace=TRUE),
Age = sample(18:60, 1000, replace=TRUE))

AgesFactor <- ordered( cut(df$Age, breaks = c(18,seq(20,60,5)),
include.lowest = TRUE))

df$Age <- AgesFactor

Now start building the plot: create the male and female plots with the corresponding subset of the data, suppressing legends, etc.

gg <- ggplot(data = df, aes(x=Age))

gg.male <- gg +
geom_bar( subset = .(Type == 'Male'),
aes( y = ..count../sum(..count..), fill = Age)) +
scale_y_continuous('', formatter = 'percent') +
opts(legend.position = 'none') +
opts(axis.text.y = theme_blank(), axis.title.y = theme_blank()) +
opts(title = 'Male', plot.title = theme_text( size = 10) ) +
coord_flip()

For the female plot, reverse the 'Percent' axis using trans = "reverse"...

gg.female <- gg + 
geom_bar( subset = .(Type == 'Female'),
aes( y = ..count../sum(..count..), fill = Age)) +
scale_y_continuous('', formatter = 'percent', trans = 'reverse') +
opts(legend.position = 'none') +
opts(axis.text.y = theme_blank(),
axis.title.y = theme_blank(),
title = 'Female') +
opts( plot.title = theme_text( size = 10) ) +
coord_flip()

Now create a plot just to display the age-brackets using geom_text, but also use a dummy geom_bar to ensure that the scaling of the "age" axis in this plot is identical to those in the male and female plots:

gg.ages <- gg + 
geom_bar( subset = .(Type == 'Male'), aes( y = 0, fill = alpha('white',0))) +
geom_text( aes( y = 0, label = as.character(Age)), size = 3) +
coord_flip() +
opts(title = 'Ages',
legend.position = 'none' ,
axis.text.y = theme_blank(),
axis.title.y = theme_blank(),
axis.text.x = theme_blank(),
axis.ticks = theme_blank(),
plot.title = theme_text( size = 10))

Finally, arrange the plots on a grid, using the method in Hadley Wickham's book:

grid.newpage()

pushViewport( viewport( layout = grid.layout(1,3, widths = c(.4,.2,.4))))

vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)

print(gg.female, vp = vplayout(1,1))
print(gg.ages, vp = vplayout(1,2))
print(gg.male, vp = vplayout(1,3))

alt text

Change position of labels in population pyramid (ggplot2)

This worked:

geom_text(aes(hjust = ifelse(Geschlecht == "Bock", 1, 0))


Related Topics



Leave a reply



Submit