-
Notifications
You must be signed in to change notification settings - Fork 0
/
3-starwars_opinions_final.Rmd
146 lines (90 loc) · 3.13 KB
/
3-starwars_opinions_final.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
---
title: "Star Wars Opinion Data"
author: "Angela Zoss"
date: "8/14/19"
output: html_document
---
## Setup your environment
```{r}
# Load required libraries
library(tidyverse)
```
## Load your data
```{r}
# data from https://fivethirtyeight.com/features/americas-favorite-star-wars-movies-and-least-favorite-characters/
# note: CSV has two rows of headers, so I have manually created a list of headers
# and am adding that after loading just the data rows
# also extracting just the opinion columns and reshaping them to long data format
new_names <- read_csv('data/StarWarsNames.csv') %>% dplyr::select(NewNames)
starwars_opins <- read_csv('data/StarWars.csv', skip=2, col_names=FALSE) %>% setNames(unlist(new_names))
starwars_opins_tidy <- starwars_opins %>% dplyr::select(RespondentID, starts_with("Opinion")) %>% gather("Character","Opinion",-RespondentID) %>% mutate(Character=sub("Opinion","",Character)) %>% na.omit()
# finally creating three separate datasets - Han Solo, Jar Jar, and combined
solo <- starwars_opins_tidy %>% dplyr::filter(Character=="Solo")
jarjar <- starwars_opins_tidy %>% dplyr::filter(Character == "JarJar")
combined <- bind_rows(solo, jarjar)
```
## Plot the opinions for Han Solo
```{r}
# hint: there is a geom that will take a categorical variable and count the data points in
# each category
ggplot(solo) +
geom_bar(aes(Opinion))
```
## Plot the opinions for Jar Jar Binks
```{r}
ggplot(jarjar) +
geom_bar(aes(Opinion))
```
## Edit the axes of each of these charts to make them comparable
```{r}
# remember, editing an axis usually requires a scale
ggplot(solo) +
geom_bar(aes(Opinion)) +
scale_y_continuous(limits=c(0,610))
ggplot(jarjar) +
geom_bar(aes(Opinion)) +
scale_y_continuous(limits=c(0,610))
```
## Advanced: can you think of another way to standardize the axes?
```{r}
ggplot(combined) +
geom_bar(aes(Opinion)) +
facet_wrap(vars(Character))
```
## Reorder the opinion levels so they are in a logical order
```{r}
opinion.levels <- c("Unfamiliar (N/A)","Very unfavorably","Somewhat unfavorably",
"Neither favorably nor unfavorably (neutral)",
"Somewhat favorably","Very favorably")
combined.f <- combined %>%
mutate(Opinion=factor(Opinion, opinion.levels))
ggplot(combined.f) +
geom_bar(aes(Opinion)) +
facet_wrap(vars(Character))
```
## Flip the axes so the opinion levels are on the y-axis
```{r}
ggplot(combined.f) +
geom_bar(aes(Opinion)) +
facet_wrap(vars(Character)) +
coord_flip()
```
## Use a different color for each opinion level
```{r}
ggplot(combined.f) +
geom_bar(aes(Opinion, fill=Opinion), show.legend = FALSE) +
facet_wrap(vars(Character)) +
coord_flip()
```
## Customize the color palette
```{r}
# hint: editing colors requires a change of scale
# can google "R colors" for a list of all named colors
opinion.colors <- c("grey50","firebrick4","firebrick1","grey85","dodgerblue1","dodgerblue4")
ggplot(combined.f) +
geom_bar(aes(Opinion, fill=Opinion), show.legend = FALSE) +
facet_wrap(vars(Character)) +
coord_flip() +
scale_fill_manual(values = opinion.colors) +
theme_minimal()
```