-
Notifications
You must be signed in to change notification settings - Fork 0
/
Seminar3.R
198 lines (167 loc) · 6.36 KB
/
Seminar3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
library("tidyverse")
urlfile = "https://raw.githubusercontent.com/CWWhitney/teaching_R/master/participants_data.csv"
participants_data <- read_csv(url(urlfile))
# Change the barplot by creating a table of gender
participants_barplot <- table(participants_data$gender)
barplot(participants_barplot)
# Create a scatterplot of days to email response (y) as a function of the letters in your first name (x)
ggplot(data = participants_data,
aes(x = letters_in_first_name,
y = days_to_email_response)) +
geom_point()
# Create a scatterplot of days to email response (y) as a function of the letters in your first name (x)
# with colors representing binary data related to academic parents (color)
# and working hours per day as bubble sizes (size).
ggplot(data = participants_data,
aes(x = letters_in_first_name,
y = days_to_email_response,
color = academic_parents,
size = working_hours_per_day)) +
geom_point()
# Create a scatterplot of iris petal length (y) as a function of sepal length (x)
# with colors representing iris species (color) and petal width as bubble sizes (size).
ggplot(data = iris,
aes(x = Sepal.Length,
y = Petal.Length,
color = Species,
size = Petal.Width))+
geom_point()
# Create a plot with the diamonds data of the carat (x) and the price (y)
plot1 <- ggplot(data = diamonds,
aes(x = carat, y = price,
alpha = 0.2)) +
geom_point()
plot1
# Create a plot with the diamonds data of the log of carat (x) and the log of price (y)
ggplot(data = diamonds,
aes(x = log(carat),
y = log(price),
alpha = 0.2)) +
geom_point()
# Create a smaller diamonds data set (top 100 rows), create a scatterplot with carat on the x-axis
# and price on the y-axis and with the color of the diamond as the color of the points.
dsmall <- top_n(diamonds, n = 100)
ggplot(data = dsmall, aes(x = carat,
y = price,
color = color)) +
geom_point()
# Create a smaller diamonds data set (top 40 rows), create a scatterplot with carat on the x-axis
# and price on the y-xis and with the cut of the diamond as the shapes for the points.
dsmall <- top_n(diamonds,
n = 40)
ggplot( data = dsmall,
aes(x = carat,
y = price,
shape = cut)) +
geom_point()
# Create a plot of the diamonds data with carat on the x-axis, price on the y-axis.
# Use the inhibit function to set the alpha to 0.1 and color to blue.
ggplot(data = diamonds,
aes(x = carat,
y = price,
alpha = I(0.1),
color = I("blue"))) +
geom_point()
# Create a smaller data set of diamonds with 50 rows. Create a scatterplot and smoothed conditional
# means overlay with carat on the x-axis and price on the y-axis.
dsmall <- top_n(diamonds,
n = 50)
ggplot(data = dsmall,
aes(x = carat,
y = price))+
geom_point()+
geom_smooth(method = "loess", formula = y ~ x)
# Create a smaller data set of diamonds with 50 rows. Create a scatterplot and smoothed conditional
# means overlay with carat on the x-axis and price on the y-axis.
# Use 'glm' as the option for the smoothing
dsmall <- top_n(diamonds,
n = 50)
ggplot(data = dsmall,
aes(x = carat,
y = price))+
geom_point()+
geom_smooth(method = 'glm', formula = y ~ x)
# Change the boxplot so that the x-axis is cut and the y-axis is price divided by carat
ggplot(data = diamonds,
aes(x = cut,
y = price/carat)) +
geom_boxplot()
# Change the jittered boxplot so that the x-axis is cut and the y-axis is price divided by carat
ggplot(data = diamonds,
aes(x = cut,
y = price/carat)) +
geom_boxplot()+
geom_jitter()
# Change the alpha to 0.4 to make the scatter less transparent
ggplot(data = diamonds,
aes(x = cut,
y = price/carat,
alpha = I(0.4))) +
geom_boxplot()+
geom_jitter()
# Change the density plot so that the x-axis is carat and the color is the diamond color
ggplot(data = diamonds,
aes(x = carat, colour = color)) +
geom_density()
# Change the density plot so that the x-axis is carat the color is the diamond color
# and the alpha is set to 0.3 using the inhibit function
ggplot(data = diamonds,
aes(x = carat,
colour = color,
alpha = I(0.3))) +
geom_density()
# Create a plot of the mpg data with manufacturer as the color and a linear model 'lm'
# as the smooth method
ggplot(data = mpg,
aes(x = displ,
y = hwy,
color = manufacturer)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x)
# Change the title and labels as you see fit
ggplot(mtcars,
aes(mpg,
y = hp,
col = gear)) +
geom_point() +
ggtitle("Horsepower dependent on miles per gallon using different gears") +
labs(x = "Miles per gallon",
y = "Horsepower",
col = "Gear")
# Change the title and labels as you see fit
ggplot(data = mtcars) +
aes(x = mpg) +
labs(x = "Miles per gallon") +
aes(y = hp) +
labs(y = "Horsepower") +
geom_point() +
aes(col = gear) +
labs(col = "Gear") +
labs(title = "Horsepower dependent on miles per gallon using different gears")
# subset the data to numeric only with select_if
part_data <- select_if(participants_data,
is.numeric)
# use 'cor' to perform pearson correlation
# use 'round' to reduce correlation
# results to 1 decimal
cormat <- round(cor(part_data),
digits = 1)
# use 'as.data.frame.table' to build a table with correlation values
melted_cormat <- as.data.frame.table(cormat,
responseName = "value")
# plot the result with 'geom-tile'
ggplot(data = melted_cormat,
aes(x = Var1,
y = Var2,
fill = value)) +
geom_tile()
mpg_wide <- reshape2::melt(select(
rename(mpg, "City" = cty, "Highway" = hwy), c("year", "City", "Highway")), "year")
png(file = "cars_efficiency.png", width = 7, height = 6, units = "in", res = 300)
ggplot(data = mpg_wide, aes(x = as.factor(year)))+
geom_col(aes(y = value, fill = variable), position = "dodge")+
xlab("Year")+
ylab("Miles per gallon")+
labs(fill = "Environment")+
ggtitle("Efficiency of Cars from 1999 and 2008")
dev.off()