There are many ways to visualise proportion data, one of which is using a pie chart. However, pie charts are often not ideal because it is harder to assess angles than lengths. Instead we may use a bar chart (simple or stacked) or a waffle chart. In this post I show you how to make each of these in R.
Data preparation
I use a dataset, HairEyeColor
, from the datasets
package. The dataset contains the hair and eye colour of 592 statistics students.
Load the dataset.
students <-
datasets:: HairEyeColor[, , 'Male' ] + HairEyeColor[, , 'Female' ]
students
Eye
Hair Brown Blue Hazel Green
Black 68 20 15 5
Brown 119 84 54 29
Red 26 17 14 14
Blond 7 94 10 16
Load the required packages.
# data preparation
library (dplyr)
# data visualisation
library (ggplot2)
# symbols in waffle chart
library (sysfonts)
library (showtextdb)
library (showtext)
# showing plots side by side
library (patchwork)
Compute the eye colour and hair colour proportions.
eye_colour <- colSums (students) / sum (students)
hair_colour <- rowSums (students) / sum (students)
Turn the data into data.frame
objects.
df_eye_colour <-
data.frame (
colour = names (eye_colour),
proportion = eye_colour,
row.names = NULL
) |>
arrange (desc (proportion))
df_hair_colour <-
data.frame (
colour = names (hair_colour),
proportion = hair_colour,
row.names = NULL
) |>
arrange (desc (proportion))
Specify suitable colours for visualisations.
cols1 <- list (
'Brown' = '#7E6A51' ,
'Blue' = '#ADD8E6' ,
'Hazel' = '#B8A98F' ,
'Green' = '#6fbd6f'
)
cols2 <- list (
'Black' = 'black' ,
'Brown' = '#7E6A51' ,
'Red' = '#905A55' ,
'Blond' = '#C9B481'
)
Simple bar chart
Avoid unnecessary elements. For example, vertical grid lines would not add any useful information, but could be distracting.
p1 <-
ggplot (df_eye_colour,
aes (forcats:: fct_reorder (colour, proportion), proportion, fill = colour)) +
geom_col (color = 'black' ) +
scale_fill_manual (values = cols1) +
scale_y_continuous (labels = scales:: label_percent (1 )) +
labs (x = '' , y = '' , caption = 'Eye colour prevalence among statistics students' ) +
theme_bw () +
theme (
legend.position = 'none' ,
panel.grid.major.x = element_blank (),
panel.grid.minor.x = element_blank ()
)
p2 <-
ggplot (df_hair_colour,
aes (forcats:: fct_reorder (colour, proportion), proportion, fill = colour)) +
geom_col (color = 'black' ) +
scale_fill_manual (values = cols2) +
scale_y_continuous (labels = scales:: label_percent (1 )) +
labs (x = '' , y = '' , caption = 'Hair colour prevalence among statistics students' ) +
theme_bw () +
theme (
legend.position = 'none' ,
panel.grid.major.x = element_blank (),
panel.grid.minor.x = element_blank ()
)
p1 + p2
Stacked bar chart
styling <- theme_void () +
theme (
legend.position = 'none' ,
axis.ticks.y.left = element_line (),
axis.ticks.length.y.left = unit (1 , 'mm' ),
axis.text.y.left = element_text ()
)
p1 <-
ggplot (df_eye_colour,
aes (
x = 0 ,
y = proportion,
fill = forcats:: fct_reorder (colour, proportion)
)) +
geom_col (position = 'stack' , color = 'black' ) +
geom_text (aes (label = colour),
position = position_stack (vjust = .5 )) +
scale_fill_manual (values = cols1) +
scale_y_continuous (labels = scales:: label_percent (1 )) +
styling +
labs (caption = 'Eye colour prevalence among statistics students' )
p2 <-
ggplot (df_hair_colour,
aes (
x = 0 ,
y = proportion,
fill = forcats:: fct_reorder (colour, proportion)
)) +
geom_col (position = 'stack' , color = 'black' ) +
geom_text (aes (label = colour),
colour = 'white' ,
position = position_stack (vjust = .5 )) +
scale_fill_manual (values = cols2) +
scale_y_continuous (labels = scales:: label_percent (1 )) +
styling +
labs (caption = 'Hair colour prevalence among statistics students' )
p1 + p2
Waffle chart
Rounded percentages may not sum to exactly 100, which is an issue for a waffle chart. For example, consider the following.
sum (round (hair_colour* 100 ))
To resolve this, you may use the largest remainder method. My implementation of the largest remainder method is the following.
eye_colour_waffle <- df_eye_colour |>
mutate (
remainder = proportion * 100 - floor (proportion * 100 ),
floored = floor (proportion * 100 )
) |>
arrange (desc (remainder)) |>
mutate (number = ifelse (100 - sum (floored) >= row_number (), floored + 1 , floored)) |>
arrange (proportion)
hair_colour_waffle <- df_hair_colour |>
mutate (
remainder = proportion * 100 - floor (proportion * 100 ),
floored = floor (proportion * 100 )
) |>
arrange (desc (remainder)) |>
mutate (number = ifelse (100 - sum (floored) >= row_number (), floored + 1 , floored)) |>
arrange (proportion)
My implementation of a 10 by 10 waffle chart is the following.
font_add (family = "FontAwesome" , regular = "fa-solid-900.ttf" )
waffle_plot <- function (number, colour, colour_palette, symbol, symbol_size= 8 ) {
p <- expand.grid (x = 0 : 9 ,
y = 0 : 9 ) %>%
rowwise () |>
mutate (index = 1 + sum (x * 10 + y >= cumsum (number)),
col = colour[[index]]) |>
ggplot (aes (x, y, color = forcats:: fct_inorder (col))) +
geom_text (label = symbol,
family = 'FontAwesome' ,
size = symbol_size) +
scale_color_manual (values = colour_palette) +
coord_equal () +
theme_void () +
labs (color = '' ) +
theme (
legend.position = 'top' ,
legend.margin = margin (1 , 3 , 1 , 1 , unit = 'mm' ),
plot.margin = margin (3 ,3 ,3 ,3 ,unit = 'mm' ),
legend.background = element_rect (fill = 'grey100' , color = 'grey' )
)
return (p)
}
Use the above function to visualise eye colour and hair colour as waffle charts.
showtext_auto (T)
p1 <- waffle_plot (number = eye_colour_waffle$ number,
colour = eye_colour_waffle$ colour,
colour_palette = cols1,
symbol = '\uf06e' , symbol_size= 6 ) +
labs (caption= 'Eye colour prevalence among statistics students' )
p2 <- waffle_plot (number = hair_colour_waffle$ number,
colour = hair_colour_waffle$ colour,
colour_palette = cols2,
symbol = '\uf0c8' , symbol_size= 7 ) +
labs (caption= 'Hair colour prevalence among statistics students' )
p1 + p2