Engaging and Beautiful Data Visualizations with ggplot2

Working with Colors

Cédric Scherer // posit::conf // September 2023

Setup

library(ggplot2)
library(dplyr)

bikes <- readr::read_csv(
  here::here("data", "london-bikes-custom.csv"), 
  col_types = "Dcfffilllddddc"
)

theme_set(theme_light(base_size = 14, base_family = "Asap SemiCondensed"))

theme_update(
  panel.grid.minor = element_blank(),
  plot.title = element_text(face = "bold"),
  plot.title.position = "plot"
)

Default Color Palettes: Categorical

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot()

Default Color Palettes: Sequential

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7)

Pre-Defined
Color Palettes

The Viridis Color Palettes

Figure 1 in Nuñez, Anderton & Renslow (2018) PLoS One: Example of a misleading colormap. Comparison between different colormaps overlaid onto the test image by Kovesi and a nanoscale secondary ion mass spectrometry image. Colormaps are as follows: (a) perceptually uniform grayscale, (b) jet, (c) jet as it appears to someone with red-green colorblindness, and (d) viridis [1], the current gold standard colormap. Below each NanoSIMS image is a corresponding “colormap-data perceptual sensitivity” (CDPS) plot, which compares perceptual differences of the colormap to actual, underlying data differences. m is the slope of the fitted line and r2 is the coefficient of determination calculated using a simple linear regression. An example of how the data may be misinterpreted are evident in the bright yellow spots in (b) and (c), which appear to represent significantly higher values than the surrounding regions. However, in fact, the dark red (in b) and dark yellow (in c) actually represent the highest values. For someone who is red-green colorblind, this is made even more difficult to interpret due to the broad, bright band in the center of the colormap with values that are difficult to distinguish.

Nuñez, Anderton & Renslow (2018), PloS One 13:e0199239. DOI: 10.1371/journal.pone.0199239

The Viridis Color Palettes

A comparison of viridis palettes against other commonly used color palettes in R including the ggplot2 default, jet and ColorBrewer palettes.

Palette comparison of commonly used sequential color palettes in R from the {viridis} vignette

The Viridis Color Palettes

The same comparison of commonly used color palettes in R, simulated as seen by a colorblind person with Deuteranopia.

Palette comparison, as seen by a person with Deuteranopia, from the {viridis} vignette

Pre-Defined Color Palettes: Viridis

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_viridis_d()

Pre-Defined Color Palettes: Viridis

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_viridis_d(
    begin = .3,
    end = .9
  )

Pre-Defined Color Palettes: Viridis

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_viridis_c()

Pre-Defined Color Palettes: Viridis

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_viridis_c(
    option = "cividis"
  )

Pre-Defined Color Palettes: Viridis

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_viridis_c(
    option = "cividis",
    end = .95,
    direction = -1
  )

Pre-Defined Color Palettes: ColorBrewer

A screnshot of the online tool to test ColorBrewer palettes, which are designed to be used for map purposes. Thus, the tool showcases the colors on a map showing an area in the Southeast of the US.

The ColorBrewer project: colorbrewer2.org

Pre-Defined Color Palettes: ColorBrewer

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_brewer()

Pre-Defined Color Palettes: ColorBrewer

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_distiller()

Pre-Defined Color Palettes: ColorBrewer

RColorBrewer::display.brewer.all()

Pre-Defined Color Palettes: ColorBrewer

RColorBrewer::display.brewer.all(colorblindFriendly = TRUE)

Pre-Defined Color Palettes: ColorBrewer

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_brewer(
    palette = "Set1"
  )

Pre-Defined Color Palettes: ColorBrewer

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_distiller(
    palette = "YlOrRd",
    direction = 1
  )

Palette Packages

{rcartocolor}

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  rcartocolor::scale_fill_carto_d()

{rcartocolor}

rcartocolor::display_carto_all()

{rcartocolor}

rcartocolor::display_carto_all(colorblind_friendly = TRUE)

{rcartocolor}

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  rcartocolor::scale_fill_carto_d(
    palette = "Safe"
  )

{scico}

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scico::scale_color_scico()

{scico}

scico::scico_palette_show()

{scico}

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scico::scale_color_scico(
    palette = "brocO",
    direction = -1
  )

{ggsci} and {ggthemes}

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  ggsci::scale_fill_npg()

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  ggthemes::scale_fill_gdocs()

{MetBrewer}

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  MetBrewer::scale_fill_met_d(
    name = "Klimt"
  )

{MetBrewer}

MetBrewer::display_all()

{MetBrewer}

MetBrewer::display_all(colorblind_only = TRUE)

{MetBrewer}

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  MetBrewer::scale_color_met_c(
    name = "Hiroshige" 
  )

Color Palette Quality

Evaluate HCL Spectrum

More on how to evaluate color palettes: HCLwizard and colorspace.

colorspace::specplot(
  colorspace::diverging_hcl(
    n = 100, palette = "Blue-Red"
  )
)

colorspace::specplot(
  MetBrewer::met.brewer(
     n = 100, name = "Hiroshige"
  )
)

Evaluate HCL Spectrum

More on how to evaluate color palettes: HCLwizard and colorspace.

colorspace::specplot(
  MetBrewer::met.brewer(
     n = 100, name = "Cassatt2"
  )
)

colorspace::specplot(
  MetBrewer::met.brewer(
     n = 100, name = "Veronese"
  )
)

Evaluate HCL Spectrum

More on how to evaluate color palettes: HCLwizard and colorspace.

colorspace::specplot(
  viridis::viridis(
    n = 100, direction = -1
  )
)

colorspace::specplot(
  MetBrewer::met.brewer(
     n = 100, name = "Hokusai3"
  )
)

Evaluate HCL Spectrum

More on how to evaluate color palettes: HCLwizard and colorspace.

colorspace::specplot(
  rainbow(
    n = 100
  )
)

colorspace::specplot(
  viridis::turbo(
     n = 100, direction = -1
  )
)

Customize Palettes

Customize Existing Palettes

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  rcartocolor::scale_fill_carto_d(
    palette = "Vivid" 
  )

Customize Existing Palettes

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_manual(
    values = rcartocolor::carto_pal(
      name = "Vivid", n = 4
    )
  )

Customize Existing Palettes

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_manual(
    values = rcartocolor::carto_pal(
      name = "Vivid", n = 5
    )[1:4]
  )

Customize Existing Palettes

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_manual(
    values = rcartocolor::carto_pal(
      name = "Vivid", n = 7
    )[c(2, 6, 1, 3)]
  )

Customize Existing Palettes

carto_custom <- 
  rcartocolor::carto_pal(
    name = "Vivid", n = 7
  )[c(2, 6, 1, 3)]

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_manual(
    values = carto_custom
  )

Customize Existing Palettes

library(prismatic)

carto_light <- clr_lighten(
  carto_custom, .7
)

ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_manual(
    values = carto_light
  )

Customize Existing Palettes

ggplot(bikes, 
       aes(x = day_night, y = count)) +
  geom_boxplot(
    aes(fill = season,
        fill = after_scale(
          clr_lighten(fill, .7)
    ))
  ) +
  scale_fill_manual(
    values = carto_custom
  )

Customize Existing Palettes

ggplot(bikes, 
       aes(x = day_night, y = count)) +
  geom_boxplot(
    aes(fill = stage(
      season, 
      after_scale = clr_lighten(fill, .7)
    ))
  ) +
  scale_fill_manual(
    values = carto_custom
  )

Customize Existing Palettes

ggplot(bikes, 
       aes(x = day_night, y = count)) +
  geom_boxplot(
    aes(color = season,
        fill = after_scale(
          clr_lighten(color, .7)
    ))
  ) +
  scale_color_manual(
    values = carto_custom
  )

Customize Existing Palettes

ggplot(bikes,
       aes(x = day_night, y = count)) +
  geom_boxplot(
    aes(color = season,
        fill = after_scale(
          clr_lighten(color, .7)
    )),
    outlier.shape = NA
  ) +
  geom_jitter(
    aes(color = season,
        color = after_scale(
          clr_darken(color, .4)
    )), 
    position = position_jitterdodge(
      dodge.width = .75, 
      jitter.width = .2
    ),
    alpha = .3, size = .6
  ) +
  scale_color_manual(
    values = carto_custom
  ) +
  theme(legend.position = "top")

Create New Palettes

Create Sequential Palettes

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_gradient(
    low = "#1D785A",
    high = "#FFCE52"
  )

Create Diverging Palettes

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_gradient2(
    low = "#663399",
    high = "#993334",
    mid = "grey85"
  )

Create Diverging Palettes

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_gradient2(
    low = "#663399",
    high = "#993334",
    mid = "grey85",
    midpoint = mean(bikes$humidity)    
  )

Create Diverging Palettes

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = temp)) +
  geom_point(alpha = .7) +
  scale_color_gradient2(
    low = "#663399",
    high = "#993334",
    mid = "grey85",
    midpoint = 10,
    limits = c(-10, 30)
  )

Evaluate HCL Spectrum

More on how to evaluate color palettes: HCLwizard and colorspace.

colorspace::specplot(
  colorRampPalette(
    c("#1D785A", "#FFCE52")
  )(100)
)

colorspace::specplot(
  colorRampPalette(
    c("#663399", "grey85", "#993334")
  )(100)
)

Create Any Palette

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_gradientn(
    colors = carto_custom  
  )

Create Any Palette

pal_c <- 
  colorRampPalette(
    c("#663399", "grey85", "#993334")
  )(5)

plot(color(pal_c))

Create Any Palette

ggplot(filter(bikes, is_weekend == TRUE), 
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_gradientn(
    colors = pal_c,
    values = c(0, .05, .2, .8, .95, 1)
  )

Build Your Own
Color Scales

“Black Lives 1900: W. E. B. Du Bois at the Paris Exposition” reprints some of the striking photographs and graphics that Du Bois and his curators commissioned for the World’s Fair, here the colorful stacked bar chart on income and expenditure of 150 negro families in Atlanta.

Illustration by W. E. B. Du Bois, Courtesy Library of Congress

Build Custom Color Scales

dubois_colors <- function(...) {
  
  # define colors
  dubois_cols <- c(
    `black`    = "#000000",
    `purple`   = "#582f6c",
    `violet`   = "#94679C",
    `pink`     = "#ef849f",
    `softred`  = "#f4b7a7",
    `iceblue`  = "#bccbf3",
    `palegrey` = "#e4e4e4"
  )

  # if no colors are specified, return all
  cols <- c(...)
  if (is.null(cols))  return (dubois_cols)

  # if colors are specified, return those
  dubois_cols[cols]
}

Build Custom Color Scales

plot(color(dubois_colors()))

Build Custom Color Scales

plot(color(dubois_colors("black", "violet", "softred", "iceblue", "palegrey")))

Build Custom Color Scales: Categorical

dubois_pal_d <- function(palette = "default", reverse = FALSE) {
  
  # nested function to return colors via `dubois_pal_d()(n)`
  function(n) {
    
    # check if number of colors is sufficient
    if(n > 5) stop('Palettes only contain 5 colors')
  
    # check arguments
    if (!palette %in% c("default", "dark", "light")) stop('palette should be "default", "dark" or "light".')
    if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric')

    # define palette styles
    if (palette == "default") { pal <- dubois_colors("black", "violet", "softred", "iceblue", "palegrey")[1:n] }
    if (palette == "dark") { pal <- dubois_colors(1:5)[1:n] }
    if (palette == "light") { pal <- dubois_colors(3:7)[1:n] }
    
    # return unnamed vector of color codes
    pal <- unname(pal)

    # check reverse argument
    if (reverse) rev(pal) else pal
  }
}

Build Custom Color Scales: Categorical

plot(color(dubois_pal_d()(5)))

Build Custom Color Scales: Categorical

plot(color(dubois_pal_d(palette = "dark", reverse = TRUE)(5)))

Build Custom Color Scales: Categorical

scale_color_dubois_d <- function(palette = "default", reverse = FALSE, ...) {
  
  # check arguments
  if (!palette %in% c("default", "dark", "light")) stop('palette should be "default", "dark" or "light".')
  if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric')

  # retrieve color set
  pal <- dubois_pal_d(palette = palette, reverse = reverse)

  # apply to discrete scale
  ggplot2::discrete_scale("colour", paste0("dubois_", palette), palette = pal, ...)
}

Build Custom Color Scales: Categorical

scale_color_dubois_d <- function(palette = "default", reverse = FALSE, ...) {
  
  # check arguments
  if (!palette %in% c("default", "dark", "light")) stop('palette should be "default", "dark" or "light".')
  if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric')

  # retrieve color set
  pal <- dubois_pal_d(palette = palette, reverse = reverse)

  # apply to discrete scale
  ggplot2::discrete_scale("colour", paste0("dubois_", palette), palette = pal, ...)
}

scale_fill_dubois_d <- function(palette = "default", reverse = FALSE, ...) {
  
  if (!palette %in% c("default", "dark", "light")) stop('palette should be "default", "dark" or "light".')
  if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric')

  pal <- dubois_pal_d(palette = palette, reverse = reverse)

  ggplot2::discrete_scale("fill", paste0("dubois_", palette), palette = pal, ...)
}

Use Your Custom Color Scales: Categorical

ggplot(bikes, 
       aes(y = weather_type, 
           fill = season)) +
  geom_bar(position = "fill") +
  scale_fill_dubois_d(
    reverse = TRUE,
    name = NULL
  ) +
  theme(legend.position = "top")

Build Custom Color Scales: Sequential

dubois_pal_c <- function(palette = "dark", reverse = FALSE, ...) {
  
  # check arguments
  if (!palette %in% c("dark", "light")) stop('palette should be "dark" or "light".')
  if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric')
  
  # define palette styles
  dubois_palettes <- list(
    `dark`    = dubois_colors("black", "purple", "violet", "pink"),
    `light`   = dubois_colors("purple", "violet", "pink", "palegrey")
  )

  # retrieve color set as unnamed vector
  pal <- dubois_palettes[[palette]]
  pal <- unname(pal)

  # check reverse argument
  if (reverse) pal <- rev(pal)

  # create a color gradient with n colors
  grDevices::colorRampPalette(pal, ...)
}

Build Custom Color Scales: Sequential

plot(color(dubois_pal_c()(50)))

Build Custom Color Scales: Sequential

plot(color(dubois_pal_c(palette = "light", reverse = TRUE)(7)))

Build Custom Color Scales: Sequential

scale_color_dubois_c <- function(palette = "dark", reverse = FALSE, ...) {
  
  # check function arguments
  if (!palette %in% c("dark", "light")) stop('Palette should be "dark" or "light".')
  if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric.')

  # apply color set to ggplot's gradientn scale
  pal <- dubois_pal_c(palette = palette, reverse = reverse)
  ggplot2::scale_color_gradientn(colours = pal(256), ...)
}

Build Custom Color Scales: Sequential

scale_color_dubois_c <- function(palette = "dark", reverse = FALSE, ...) {
  
  # check function arguments
  if (!palette %in% c("dark", "light")) stop('Palette should be "dark" or "light".')
  if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric.')

  # apply color set to ggplot's gradientn scale
  pal <- dubois_pal_c(palette = palette, reverse = reverse)
  ggplot2::scale_color_gradientn(colours = pal(256), ...)
}

scale_fill_dubois_c <- function(palette = "dark", reverse = FALSE, ...) {
  
  if (!palette %in% c("dark", "light")) stop('Palette should be "dark" or "light".')
  if (!is.logical(reverse) & !is.numeric(reverse)) stop('reverse should be logical or numeric.')

  pal <- dubois_pal_c(palette = palette, reverse = reverse)
  ggplot2::scale_fill_gradientn(colours = pal(256), ...)
}

Use Your Custom Color Scales: Sequential

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_dubois_c()

Use Your Custom Color Scales: Sequential

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_dubois_c(
    palette = "light",
    reverse = TRUE
  )

Evaluate HCL Spectrum

More on how to evaluate color palettes: HCLwizard and colorspace.

colorspace::specplot(
  dubois_pal_c()(100)
)

colorspace::specplot(
  dubois_pal_c(palette = "light")(100)
)

Design Colorblind-Friendly Graphics

Emulate CVD

deut <- 
  prismatic::clr_deutan(
    dubois_pal_c()(100)
  )

ggplot(filter(bikes, is_weekend == TRUE),
       aes(x = temp_feel, y = count, 
           color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_gradientn(
    colors = deut
  )

Emulate CVD

g1 <- 
  ggplot(filter(bikes, is_weekend == TRUE),
         aes(x = temp_feel, y = count, 
             color = humidity)) +
  geom_point(alpha = .7) +
  scale_color_dubois_c()

g1

Emulate CVD

colorblindr::cvd_grid(g1)

Emulate CVD

colorBlindness::cvdPlot(g1)

Emulate CVD

g2 <- 
  ggplot(bikes, 
       aes(x = day_night, y = count, 
           fill = season)) +
  geom_boxplot() +
  scale_fill_manual(
    values = carto_custom
  )

g2

Emulate CVD

colorblindr::cvd_grid(g2)

Emulate CVD

g3 <- 
  ggplot(bikes, 
         aes(x = temp_feel, y = count,
             color = season)) + 
  geom_point(
    alpha = .5, size = 1.5
  ) + 
  stat_smooth(
    aes(group = day_night),
    method = "lm", color = "black"
  ) +
  scale_color_manual(
    values = c("#6681FE", "#1EC98D", "#F7B01B", "#A26E7C")
  )

g3

Emulate CVD

colorblindr::cvd_grid(g3)

Recap

  • use categorical palettes for qualitative data
    e.g. scale_*_manual() or scale_*_brewer()
  • use sequential or diverging palettes for quantitative data
    e.g. scale_*_gradient|gradient2() or scale_*_viridis_c
  • various packages provide palettes incl. scale_* components
    e.g. {rcartocolors}, {scico}, {ggsci} and {MetBrewer}
  • color packages return palettes as vectors that can be modified and supplied to scale_*_manual() and scale_*_gradientn()
  • use after_scale to modify and recycle color scales
  • evaluate palette quality with {colorspace} (HCL) and {colorblindr} (CVD)

Exercises

Exercise 1

  • Add colors to our bar chart from the last exercise:

Exercise 2

  • Create a plot of your choice with a sequential (non-default) color palette.
    • Inspect the HCL spectrum. Adjust the palette if needed.
    • Test the palette with regard to colorblindness. Adjust the palette if needed.
    • Save and share the graphic.