Skip to content

Latest commit

 

History

History
386 lines (281 loc) · 9.4 KB

gapminder-ggplot2-scatterplot.md

File metadata and controls

386 lines (281 loc) · 9.4 KB

gapminder-ggplot2-scatterplot.r

Jenny Bryan Mon Oct 3 23:50:31 2016

Note: this report is made by rendering an R script. So the narrative is very minimal.

library(tibble)
library(ggplot2)

Load the gapminder data package.

library(gapminder)
gapminder
## # A tibble: 1,704 × 6
##        country continent  year lifeExp      pop gdpPercap
##         <fctr>    <fctr> <int>   <dbl>    <int>     <dbl>
## 1  Afghanistan      Asia  1952  28.801  8425333  779.4453
## 2  Afghanistan      Asia  1957  30.332  9240934  820.8530
## 3  Afghanistan      Asia  1962  31.997 10267083  853.1007
## 4  Afghanistan      Asia  1967  34.020 11537966  836.1971
## 5  Afghanistan      Asia  1972  36.088 13079460  739.9811
## 6  Afghanistan      Asia  1977  38.438 14880372  786.1134
## 7  Afghanistan      Asia  1982  39.854 12881816  978.0114
## 8  Afghanistan      Asia  1987  40.822 13867957  852.3959
## 9  Afghanistan      Asia  1992  41.674 16317921  649.3414
## 10 Afghanistan      Asia  1997  41.763 22227415  635.3414
## # ... with 1,694 more rows
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) # nothing to plot yet!

ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
  geom_point()

p <- ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) # just initializes

scatterplot

p + geom_point()

log transformation ... quick and dirty

ggplot(gapminder, aes(x = log10(gdpPercap), y = lifeExp)) +
  geom_point()

a better way to log transform

p + geom_point() + scale_x_log10()

let's make that stick

p <- p + scale_x_log10()

common workflow: gradually build up the plot you want re-define the object 'p' as you develop "keeper" commands convey continent by color: MAP continent variable to aesthetic color

p + geom_point(aes(color = continent))

## add summary(p)!
plot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() + scale_x_log10() # in full detail, up to now
## Error in plot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) + : non-numeric argument to binary operator

address overplotting: SET alpha transparency and size to a value

p + geom_point(alpha = (1/3), size = 3)

add a fitted curve or line

p + geom_point() + geom_smooth()

p + geom_point() + geom_smooth(lwd = 3, se = FALSE)

p + geom_point() + geom_smooth(lwd = 3, se = FALSE, method = "lm")

revive our interest in continents!

p + aes(color = continent) + geom_point() +
  geom_smooth(lwd = 3, se = FALSE)

facetting: another way to exploit a factor

p + geom_point(alpha = (1/3), size = 3) +
  facet_wrap(~ continent)

p + geom_point(alpha = (1/3), size = 3) +
  facet_wrap(~ continent) +
  geom_smooth(lwd = 2, se = FALSE)

exercises: * plot lifeExp against year

ggplot(gapminder, aes(x = year, y = lifeExp,
                      color = continent)) +
  geom_jitter(alpha = 1/3, size = 3)

  • make mini-plots, split out by continent HINT: use facet_wrap()
ggplot(gapminder, aes(x = year, y = lifeExp,
                      color = continent)) +
  facet_wrap(~ continent, scales = "free_x") +
  geom_jitter(alpha = 1/3, size = 3) +
  scale_color_manual(values = continent_colors)

ggplot(subset(gapminder, continent != "Oceania"),
       aes(x = year, y = lifeExp, group = country, color = country)) +
  geom_line(lwd = 1, show_guide = FALSE) + facet_wrap(~ continent) +
  scale_color_manual(values = country_colors) +
  #scale_color_brewer()+
  theme_bw() + theme(strip.text = element_text(size = rel(1.1)))
## Warning: `show_guide` has been deprecated. Please use `show.legend`
## instead.

  • add a fitted smooth and/or linear regression, w/ or w/o facetting
ggplot(gapminder, aes(x = year, y = lifeExp,
                      color = continent)) +
  facet_wrap(~ continent, scales = "free_x") +
  geom_jitter(alpha = 1/3, size = 3) +
  scale_color_manual(values = continent_colors) +
  geom_smooth(lwd = 2)

  • use dplyr::filter() to plot lifeExp against year for just one country or continent
jc <- "Cambodia"
gapminder %>% 
  filter(country == jc) %>% 
  ggplot(aes(x = year, y = lifeExp)) +
  labs(title = jc) +
  geom_line()
## Error in eval(expr, envir, enclos): could not find function "%>%"
rwanda <- gapminder %>%
  filter(country == "Rwanda")
## Error in eval(expr, envir, enclos): could not find function "%>%"
p <- ggplot(rwanda, aes(x = year, y = lifeExp)) +
  labs(title = "Rwanda") +
  geom_line()
## Error in ggplot(rwanda, aes(x = year, y = lifeExp)): object 'rwanda' not found
print(p)

ggsave("rwanda.pdf")
## Saving 7 x 5 in image
ggsave("rwanda.pdf",plot = p)
## Saving 7 x 5 in image
  • other ideas? plot lifeExp against year
(y <- ggplot(gapminder, aes(x = year, y = lifeExp)) + geom_point())

make mini-plots, split out by continent

y + facet_wrap(~ continent)

add a fitted smooth and/or linear regression, w/ or w/o facetting

y + geom_smooth(se = FALSE, lwd = 2) +
  geom_smooth(se = FALSE, method ="lm", color = "orange", lwd = 2)

y + geom_smooth(se = FALSE, lwd = 2) +
  facet_wrap(~ continent)

last bit on scatterplots how can we "connect the dots" for one country? i.e. make a spaghetti plot?

y + facet_wrap(~ continent) + geom_line() # uh, no

y + facet_wrap(~ continent) + geom_line(aes(group = country)) # yes!

y + facet_wrap(~ continent) + geom_line(aes(group = country)) +
  geom_smooth(se = FALSE, lwd = 2) 

note about subsetting data sadly, ggplot() does not have a 'subset =' argument so do that 'on the fly' with subset(..., subset = ...)

ggplot(subset(gapminder, country == "Zimbabwe"),
       aes(x = year, y = lifeExp)) + geom_line() + geom_point()

or could do with dplyr::filter

suppressPackageStartupMessages(library(dplyr))
ggplot(gapminder %>% filter(country == "Zimbabwe"),
       aes(x = year, y = lifeExp)) + geom_line() + geom_point()

let just look at four countries

jCountries <- c("Canada", "Rwanda", "Cambodia", "Mexico")
ggplot(subset(gapminder, country %in% jCountries),
       aes(x = year, y = lifeExp, color = country)) + geom_line() + geom_point()

when you really care, make your legend easy to navigate this means visual order = data order = factor level order

ggplot(subset(gapminder, country %in% jCountries),
       aes(x = year, y = lifeExp, color = reorder(country, -1 * lifeExp, max))) +
  geom_line() + geom_point()

another approach to overplotting ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +

ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
  scale_x_log10() + geom_bin2d()

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
## 
## locale:
## [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_0.5.0     gapminder_0.2.0 ggplot2_2.1.0   tibble_1.2     
## [5] knitr_1.14.2   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.7        magrittr_1.5       munsell_0.4.3     
##  [4] colorspace_1.2-6   lattice_0.20-33    R6_2.1.3          
##  [7] stringr_1.1.0      plyr_1.8.4         tools_3.3.1       
## [10] grid_3.3.1         gtable_0.2.0       nlme_3.1-128      
## [13] mgcv_1.8-13        DBI_0.4-1          htmltools_0.3.5   
## [16] lazyeval_0.2.0     yaml_2.1.13        assertthat_0.1    
## [19] digest_0.6.10      Matrix_1.2-6       formatR_1.4       
## [22] evaluate_0.9       rmarkdown_1.0.9014 labeling_0.3      
## [25] stringi_1.1.1      scales_0.4.0