/
W2_assignment.R
41 lines (34 loc) · 1.7 KB
/
W2_assignment.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
## Import data ----
library(tidyverse)
green_space_data <- read_csv("Green_Space_Biodiversity_Partial.csv")
## Examine structure and check for problems in the dataset ----
# str(green_space_data)
problems(green_space_data)
# There aren't any obvious problems with the dataset, such as missing or incorrect information
## Convert to long format ----
green_space_long <- green_space_data %>%
pivot_longer(
cols = c(butterfly, bird),
names_to = "animal",
values_to = "species.richness"
)
## Examine data ----
greenspaceTable <- (green_space_long
%>% mutate(site=as.factor(site)
, animal=as.factor(animal))
)
summary(greenspaceTable)
# Summary table looks correct
# Each site has 2 data points (1 for bird and 1 for butterfly)
# There are 15 bird and butterfly data points for 15 sites (A–O)
## Visually check normality assumption ----
lm_green_space<- lm(species.richness ~ animal, data = greenspaceTable)
qqnorm(residuals(lm_green_space),main=""); qqline(residuals(lm_green_space))
# The dots visually look close to the line, so we can probably assume normality
# To double check, we could also conduct a Shapiro-Wilk normality test
shapiro.test(residuals(lm_green_space))
# The test is non-significant (p = 0.337), so we can assume normality
## 🙂 This is a really good example of what you shouldn't do with a non-significant P value; if you have enough biological data, S-W will always be significant. Better to look at the residuals, or even the S-W statistic
## Also: it's a little odd that you are testing normality without looking at the areas, which could well be important
## Save object as RDS file ----
saveRDS(greenspaceTable, file = "greenspaceTable.rds")