Great Looking Waffle Charts in R

R
Data-visualisation
Author

Filip Reierson

Published

June 17, 2023

There are many ways to visualise proportion data, one of which is using a pie chart. However, pie charts are often not ideal because it is harder to assess angles than lengths. Instead we may use a bar chart (simple or stacked) or a waffle chart. In this post I show you how to make each of these in R.

Data preparation

I use a dataset, HairEyeColor, from the datasets package. The dataset contains the hair and eye colour of 592 statistics students.

Load the dataset.

students <-
  datasets::HairEyeColor[, , 'Male'] + HairEyeColor[, , 'Female']
students
       Eye
Hair    Brown Blue Hazel Green
  Black    68   20    15     5
  Brown   119   84    54    29
  Red      26   17    14    14
  Blond     7   94    10    16

Load the required packages.

# data preparation
library(dplyr)
# data visualisation
library(ggplot2)
# symbols in waffle chart
library(sysfonts)
library(showtextdb)
library(showtext)
# showing plots side by side
library(patchwork)

Compute the eye colour and hair colour proportions.

eye_colour <- colSums(students) / sum(students)
hair_colour <- rowSums(students) / sum(students)

Turn the data into data.frame objects.

df_eye_colour <-
  data.frame(
    colour = names(eye_colour),
    proportion = eye_colour,
    row.names = NULL
  ) |>
  arrange(desc(proportion))
df_hair_colour <-
  data.frame(
    colour = names(hair_colour),
    proportion = hair_colour,
    row.names = NULL
  ) |>
  arrange(desc(proportion))

Specify suitable colours for visualisations.

cols1 <- list(
  'Brown' = '#7E6A51',
  'Blue' = '#ADD8E6',
  'Hazel' = '#B8A98F',
  'Green' = '#6fbd6f'
)
cols2 <- list(
  'Black' = 'black',
  'Brown' = '#7E6A51',
  'Red' = '#905A55',
  'Blond' = '#C9B481'
)

Simple bar chart

Avoid unnecessary elements. For example, vertical grid lines would not add any useful information, but could be distracting.

p1 <-
  ggplot(df_eye_colour,
         aes(forcats::fct_reorder(colour, proportion), proportion, fill = colour)) +
  geom_col(color = 'black') +
  scale_fill_manual(values = cols1) +
  scale_y_continuous(labels = scales::label_percent(1)) +
  labs(x = '', y = '', caption = 'Eye colour prevalence among statistics students') +
  theme_bw() +
  theme(
    legend.position = 'none',
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p2 <-
  ggplot(df_hair_colour,
         aes(forcats::fct_reorder(colour, proportion), proportion, fill = colour)) +
  geom_col(color = 'black') +
  scale_fill_manual(values = cols2) +
  scale_y_continuous(labels = scales::label_percent(1)) +
  labs(x = '', y = '', caption = 'Hair colour prevalence among statistics students') +
  theme_bw() +
  theme(
    legend.position = 'none',
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p1 + p2

Stacked bar chart

styling <- theme_void() +
  theme(
    legend.position = 'none',
    axis.ticks.y.left = element_line(),
    axis.ticks.length.y.left = unit(1, 'mm'),
    axis.text.y.left = element_text()
  )
p1 <-
  ggplot(df_eye_colour,
         aes(
           x = 0,
           y = proportion,
           fill = forcats::fct_reorder(colour, proportion)
         )) +
  geom_col(position = 'stack', color = 'black') +
  geom_text(aes(label = colour),
            position = position_stack(vjust = .5)) +
  scale_fill_manual(values = cols1) +
  scale_y_continuous(labels = scales::label_percent(1)) +
  styling +
  labs(caption = 'Eye colour prevalence among statistics students')
p2 <-
  ggplot(df_hair_colour,
         aes(
           x = 0,
           y = proportion,
           fill = forcats::fct_reorder(colour, proportion)
         )) +
  geom_col(position = 'stack', color = 'black') +
  geom_text(aes(label = colour),
            colour = 'white',
            position = position_stack(vjust = .5)) +
  scale_fill_manual(values = cols2) +
  scale_y_continuous(labels = scales::label_percent(1)) +
  styling +
  labs(caption = 'Hair colour prevalence among statistics students')
p1 + p2

Waffle chart

Rounded percentages may not sum to exactly 100, which is an issue for a waffle chart. For example, consider the following.

sum(round(hair_colour*100))
[1] 99

To resolve this, you may use the largest remainder method. My implementation of the largest remainder method is the following.

eye_colour_waffle <- df_eye_colour |>
  mutate(
    remainder = proportion * 100 - floor(proportion * 100),
    floored = floor(proportion * 100)
  ) |>
  arrange(desc(remainder)) |>
  mutate(number = ifelse(100 - sum(floored) >= row_number(), floored + 1, floored)) |>
  arrange(proportion)
hair_colour_waffle <- df_hair_colour |>
  mutate(
    remainder = proportion * 100 - floor(proportion * 100),
    floored = floor(proportion * 100)
  ) |>
  arrange(desc(remainder)) |>
  mutate(number = ifelse(100 - sum(floored) >= row_number(), floored + 1, floored)) |>
  arrange(proportion)

My implementation of a 10 by 10 waffle chart is the following.

font_add(family = "FontAwesome", regular = "fa-solid-900.ttf")
waffle_plot <- function(number, colour, colour_palette, symbol, symbol_size=8) {
  p <- expand.grid(x = 0:9,
                   y = 0:9) %>%
    rowwise() |>
    mutate(index = 1+sum(x * 10 + y >= cumsum(number)),
           col = colour[[index]]) |>
    ggplot(aes(x, y, color = forcats::fct_inorder(col))) +
    geom_text(label = symbol,
              family = 'FontAwesome',
              size = symbol_size) +
    scale_color_manual(values = colour_palette) +
    coord_equal() +
    theme_void() +
    labs(color = '') +
    theme(
      legend.position = 'top',
      legend.margin = margin(1, 3, 1, 1, unit = 'mm'),
      plot.margin = margin(3,3,3,3,unit = 'mm'),
      legend.background = element_rect(fill = 'grey100', color = 'grey')
    )
  return(p)
}

Use the above function to visualise eye colour and hair colour as waffle charts.

showtext_auto(T)
p1 <- waffle_plot(number = eye_colour_waffle$number,
            colour = eye_colour_waffle$colour,
            colour_palette = cols1,
            symbol = '\uf06e', symbol_size=6) +
  labs(caption='Eye colour prevalence among statistics students')
p2 <- waffle_plot(number = hair_colour_waffle$number,
            colour = hair_colour_waffle$colour,
            colour_palette = cols2,
            symbol = '\uf0c8', symbol_size=7) +
  labs(caption='Hair colour prevalence among statistics students') 
p1 + p2