forked from ImperialCollegeLondon/RCDS-basic-statistics
/
basic.r
64 lines (42 loc) · 1.25 KB
/
basic.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
### LOADING DATA
# Load a table of data from a CSV file
iris <- read.csv( "iris.csv" )
# See the variables (column names) that are present in the table
str( iris )
# Get hold of a named column as a data vector
iris$sepal.width
### DISPLAYING DATA
# Load the datasets
titanic <- read.csv( "titanic.csv" )
iris <- read.csv( "iris.csv" )
# A frequency table
table( titanic$status )
# A bar chart
barplot( table( titanic$status ) )
# A histogram
hist( iris$petal.length, main="My Histogram", xlab="Petal length /cm" )
# A scatter plot
plot( iris$petal.length, iris$petal.width, xlab="Petal length /cm", ylab="Petal width /cm" )
# Add a regression line
abline( lm( iris$petal.width ~ iris$petal.length ), col="red" )
### DESCRIPTIVE STATISTICS
# The mean
mean( iris$sepal.length )
# The median
median( iris$sepal.length )
# A function to get the mode
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(table(match(v, uniqv)))]
}
getmode( titanic$status )
# The interquartile range
IQR( iris$sepal.length )
# A box plot
boxplot( iris$sepal.length, horizontal=TRUE, xlab="Sepal length /cm" )
# The variance
var( iris$sepal.length )
# The standard deviation
sd( iris$sepal.length )
# Pearson correlation
cor( iris$petal.length, iris$petal.width )