forked from bixcop18/module2_R_biostats
-
Notifications
You must be signed in to change notification settings - Fork 0
/
R_history_04_30_pm.R
143 lines (143 loc) · 4.54 KB
/
R_history_04_30_pm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
ls # incorrect: calling a function without parenthesis is asking for its code!
ls() # lists the variables in your environment
length(myvec)
myvec
a
a + 5
myvec + c(1000, -6, 23.5, 12, -10)
myvec + c(1000, -6, 23.5, 12.35, -10.53456)
myvec + c(1000, -6, 23.5, 12.35, -10.53456) -> myvec
length(myvec)
myvec(1) # a vector of type numeric is NOT callable!
myvec[1] # slicing a vector
myvec[10:14] # slicing a vector, from index 10 to index 14, inclusive
# slicing creates a NEW vector
# we can get non-consecutive items:
myvec[10,14,11]
myvec[[10],[14],[11]]
myvec[c(10,14,11)]
# the above is a way to get non-consecutive elements from a vector
myvec[seq(1,length(myvec),2)] # taking elements from first to last, by steps of 2
myvec[c(1,3,5,7,9)]
# accessing out of bounds produces NA values!
length(myvec)
myvec[101]
myvec[c(101,56, -2)]
myvec[c(101, 56, 200)]
# BEWARE! Minus in square brackets filtering of a vector EXCLUDES some values
multiplier_vector[-3]
multiplier_vector[-c(1,3)]
## INTRODUCING LOGICAL FILTERS
myvec
multiplier_vector
multiplier_vector[-5] # silently ignores the fact there are fewer than 5 elements in this array
multiplier_vector
multiplier_vector[-1]
multiplier_vector[-3]
multiplier_vector[c(1,3)]
multiplier_vector[-c(1,3)] # all values BUT the ones at the indices mentioned in c(...)
# we want to extract from myvec all values larger than 50
myvec > 50
myvec[myvec > 50]
myvec[myvec > 50] # using a logical mask/filter/sieve to extract values responding "TRUE" to a certain test
length(myvec > 50) # count the number of values larger than 50
count(myvec > 50) # count the number of values larger than 50
?sum
sum(1,2,3)
sum(c(1,2,3,67,56))
sum(c(1,2,3,67,NA,56))
sum(c(1,2,3,67,NA,56),na.rm = TRUE)
sum(myvec > 50) # proper count of the number of values larger than 50
sum(1:5)
##
## DESCRIPTIVE STATS ON SAMPLE myvec
myvec
# sample mean
sum(myvec/myvec) # outputs 100
sum(myvec)/length(myvec)
sum(1:100)/100
mean(myvec)
# min and max give you the range of a vector
min(myvec)
max(myvec)
# let's prove that there is no value greater than 1096 in this vector:
myvec > 1096
sum(myvec > 1096)
# range is min and max combined
range(myvec)
is.na(myvec) # logical vector
sum(is.na(myvec)) # counting the number of NAs
?sort
sort(myvec)
myvec
# myvec has not been modified by sort()
sort(myvec)[(50+51)/2]
sort(myvec)-> sorted_vector
(sorted_vector[50] + sorted_vector[51]) / 2 # manual calculation of the sample median
median(myvec) # shorter way
summary(myvec) # very versatile, useful function
sum(myvec > 106.4) # counting points over the third quartile
summary(myvec) -> summary_object # everything is an object!
summary_object[5]
sum(myvec > summary_object[5]) # counting points over the third quartile
# named vectors are vectors with an extra attribute: names
# named vectors still DO HAVE indices
# so we have two ways to address or extract the elements in a named vector
summary_object["Median"]
summary_object[3] # same same!
str(summary_object)
str(summary_object)
names(summary_object) # calling the "names" attribute
attributes(summary_object)
# str() is a VERY IMPORTANT FUNCTION
str(the_false_object) # structure of a simple vector
str(sorted_vector)
head(sorted_vector)
tail(sorted_vector)
str(a)
?head
head(sorted_vector,n=10) # getting the 10 top values
?sort
## LOADING A BUILT-IN DATASET: mtcars
mtcars
iris
str(iris) # what is the structure of that dataset?
head(iris) # gets the first 6 observations
summary(iris) # variable-wise summary of the whole dta frame
# beware! length of a data frame == number of columns (variables)
length(iris)
dim(iris) # to get the proper number of rows and columns
nrow(iris) # just the number of rows
ncol(iris) # just the number of columns
colnames(iris)
rownames(iris)
rownames(mtcars)
rownames(iris) <- 'beautiful_flower'
rownames(iris)[12] <- 'beautiful_flower'
iris
head(iris, n=15)
rownames(iris)[1] <- 'beautiful_flower'
rownames(iris)[1]
# in a data frame, each column is a vector
# to extract from a data frame one of its column: "$" operator
str(iris)
iris$Petal.Length
iris$Petal.Length -> petal_lengths
is.vector(petal_lengths)
attributes(petal_lengths)
## MY FIRST GRAPHICS
# very simple plot with plot()
plot(petal_lengths)
range(petal_lengths)
iris$Species
plot(iris$Petal.Length, col=iris$Species)
# plotting with an argument for the colour
# first boxplots
boxplot(iris$Petal.Length)
median(iris$Petal.Length)
summary(iris$Petal.Length)
boxplot(Petal.Length ~ Species)
boxplot(Petal.Length ~ Species, data = iris)
?boxplot # to get help
boxplot(Petal.Length ~ Species, data = iris, col=c("blue","red","green"))
savehistory("R_history_04_30_pm.txt")